2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
104 return rb_entry(node, struct extent_backref, node);
107 struct data_backref {
108 struct extent_backref node;
122 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
123 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
124 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
125 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
126 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
127 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
128 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
129 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
130 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
131 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
132 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
133 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
134 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
135 #define NO_INODE_ITEM (1<<14) /* no inode_item */
136 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
137 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
138 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
140 static inline struct data_backref* to_data_backref(struct extent_backref *back)
142 return container_of(back, struct data_backref, node);
145 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
147 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
148 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
149 struct data_backref *back1 = to_data_backref(ext1);
150 struct data_backref *back2 = to_data_backref(ext2);
152 WARN_ON(!ext1->is_data);
153 WARN_ON(!ext2->is_data);
155 /* parent and root are a union, so this covers both */
156 if (back1->parent > back2->parent)
158 if (back1->parent < back2->parent)
161 /* This is a full backref and the parents match. */
162 if (back1->node.full_backref)
165 if (back1->owner > back2->owner)
167 if (back1->owner < back2->owner)
170 if (back1->offset > back2->offset)
172 if (back1->offset < back2->offset)
175 if (back1->found_ref && back2->found_ref) {
176 if (back1->disk_bytenr > back2->disk_bytenr)
178 if (back1->disk_bytenr < back2->disk_bytenr)
181 if (back1->bytes > back2->bytes)
183 if (back1->bytes < back2->bytes)
191 * Much like data_backref, just removed the undetermined members
192 * and change it to use list_head.
193 * During extent scan, it is stored in root->orphan_data_extent.
194 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
196 struct orphan_data_extent {
197 struct list_head list;
205 struct tree_backref {
206 struct extent_backref node;
213 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
215 return container_of(back, struct tree_backref, node);
218 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
220 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
221 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 struct tree_backref *back1 = to_tree_backref(ext1);
223 struct tree_backref *back2 = to_tree_backref(ext2);
225 WARN_ON(ext1->is_data);
226 WARN_ON(ext2->is_data);
228 /* parent and root are a union, so this covers both */
229 if (back1->parent > back2->parent)
231 if (back1->parent < back2->parent)
237 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
239 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
240 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
242 if (ext1->is_data > ext2->is_data)
245 if (ext1->is_data < ext2->is_data)
248 if (ext1->full_backref > ext2->full_backref)
250 if (ext1->full_backref < ext2->full_backref)
254 return compare_data_backref(node1, node2);
256 return compare_tree_backref(node1, node2);
259 /* Explicit initialization for extent_record::flag_block_full_backref */
260 enum { FLAG_UNSET = 2 };
262 struct extent_record {
263 struct list_head backrefs;
264 struct list_head dups;
265 struct rb_root backref_tree;
266 struct list_head list;
267 struct cache_extent cache;
268 struct btrfs_disk_key parent_key;
273 u64 extent_item_refs;
275 u64 parent_generation;
279 unsigned int flag_block_full_backref:2;
280 unsigned int found_rec:1;
281 unsigned int content_checked:1;
282 unsigned int owner_ref_checked:1;
283 unsigned int is_root:1;
284 unsigned int metadata:1;
285 unsigned int bad_full_backref:1;
286 unsigned int crossing_stripes:1;
287 unsigned int wrong_chunk_type:1;
290 static inline struct extent_record* to_extent_record(struct list_head *entry)
292 return container_of(entry, struct extent_record, list);
295 struct inode_backref {
296 struct list_head list;
297 unsigned int found_dir_item:1;
298 unsigned int found_dir_index:1;
299 unsigned int found_inode_ref:1;
309 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
311 return list_entry(entry, struct inode_backref, list);
314 struct root_item_record {
315 struct list_head list;
321 struct btrfs_key drop_key;
324 #define REF_ERR_NO_DIR_ITEM (1 << 0)
325 #define REF_ERR_NO_DIR_INDEX (1 << 1)
326 #define REF_ERR_NO_INODE_REF (1 << 2)
327 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
328 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
329 #define REF_ERR_DUP_INODE_REF (1 << 5)
330 #define REF_ERR_INDEX_UNMATCH (1 << 6)
331 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
332 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
333 #define REF_ERR_NO_ROOT_REF (1 << 9)
334 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
335 #define REF_ERR_DUP_ROOT_REF (1 << 11)
336 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
338 struct file_extent_hole {
344 struct inode_record {
345 struct list_head backrefs;
346 unsigned int checked:1;
347 unsigned int merging:1;
348 unsigned int found_inode_item:1;
349 unsigned int found_dir_item:1;
350 unsigned int found_file_extent:1;
351 unsigned int found_csum_item:1;
352 unsigned int some_csum_missing:1;
353 unsigned int nodatasum:1;
366 struct rb_root holes;
367 struct list_head orphan_extents;
372 #define I_ERR_NO_INODE_ITEM (1 << 0)
373 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
374 #define I_ERR_DUP_INODE_ITEM (1 << 2)
375 #define I_ERR_DUP_DIR_INDEX (1 << 3)
376 #define I_ERR_ODD_DIR_ITEM (1 << 4)
377 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
378 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
379 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
380 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
381 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
382 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
383 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
384 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
385 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
386 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
388 struct root_backref {
389 struct list_head list;
390 unsigned int found_dir_item:1;
391 unsigned int found_dir_index:1;
392 unsigned int found_back_ref:1;
393 unsigned int found_forward_ref:1;
394 unsigned int reachable:1;
403 static inline struct root_backref* to_root_backref(struct list_head *entry)
405 return list_entry(entry, struct root_backref, list);
409 struct list_head backrefs;
410 struct cache_extent cache;
411 unsigned int found_root_item:1;
417 struct cache_extent cache;
422 struct cache_extent cache;
423 struct cache_tree root_cache;
424 struct cache_tree inode_cache;
425 struct inode_record *current;
434 struct walk_control {
435 struct cache_tree shared;
436 struct shared_node *nodes[BTRFS_MAX_LEVEL];
442 struct btrfs_key key;
444 struct list_head list;
447 struct extent_entry {
452 struct list_head list;
455 struct root_item_info {
456 /* level of the root */
458 /* number of nodes at this level, must be 1 for a root */
462 struct cache_extent cache_extent;
466 * Error bit for low memory mode check.
468 * Currently no caller cares about it yet. Just internal use for error
471 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
472 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
473 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
474 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
475 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
476 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
477 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
478 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
479 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
480 #define CHUNK_TYPE_MISMATCH (1 << 8)
482 static void *print_status_check(void *p)
484 struct task_ctx *priv = p;
485 const char work_indicator[] = { '.', 'o', 'O', 'o' };
487 static char *task_position_string[] = {
489 "checking free space cache",
493 task_period_start(priv->info, 1000 /* 1s */);
495 if (priv->tp == TASK_NOTHING)
499 printf("%s [%c]\r", task_position_string[priv->tp],
500 work_indicator[count % 4]);
503 task_period_wait(priv->info);
508 static int print_status_return(void *p)
516 static enum btrfs_check_mode parse_check_mode(const char *str)
518 if (strcmp(str, "lowmem") == 0)
519 return CHECK_MODE_LOWMEM;
520 if (strcmp(str, "orig") == 0)
521 return CHECK_MODE_ORIGINAL;
522 if (strcmp(str, "original") == 0)
523 return CHECK_MODE_ORIGINAL;
525 return CHECK_MODE_UNKNOWN;
528 /* Compatible function to allow reuse of old codes */
529 static u64 first_extent_gap(struct rb_root *holes)
531 struct file_extent_hole *hole;
533 if (RB_EMPTY_ROOT(holes))
536 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
540 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
542 struct file_extent_hole *hole1;
543 struct file_extent_hole *hole2;
545 hole1 = rb_entry(node1, struct file_extent_hole, node);
546 hole2 = rb_entry(node2, struct file_extent_hole, node);
548 if (hole1->start > hole2->start)
550 if (hole1->start < hole2->start)
552 /* Now hole1->start == hole2->start */
553 if (hole1->len >= hole2->len)
555 * Hole 1 will be merge center
556 * Same hole will be merged later
559 /* Hole 2 will be merge center */
564 * Add a hole to the record
566 * This will do hole merge for copy_file_extent_holes(),
567 * which will ensure there won't be continuous holes.
569 static int add_file_extent_hole(struct rb_root *holes,
572 struct file_extent_hole *hole;
573 struct file_extent_hole *prev = NULL;
574 struct file_extent_hole *next = NULL;
576 hole = malloc(sizeof(*hole));
581 /* Since compare will not return 0, no -EEXIST will happen */
582 rb_insert(holes, &hole->node, compare_hole);
584 /* simple merge with previous hole */
585 if (rb_prev(&hole->node))
586 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
588 if (prev && prev->start + prev->len >= hole->start) {
589 hole->len = hole->start + hole->len - prev->start;
590 hole->start = prev->start;
591 rb_erase(&prev->node, holes);
596 /* iterate merge with next holes */
598 if (!rb_next(&hole->node))
600 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
602 if (hole->start + hole->len >= next->start) {
603 if (hole->start + hole->len <= next->start + next->len)
604 hole->len = next->start + next->len -
606 rb_erase(&next->node, holes);
615 static int compare_hole_range(struct rb_node *node, void *data)
617 struct file_extent_hole *hole;
620 hole = (struct file_extent_hole *)data;
623 hole = rb_entry(node, struct file_extent_hole, node);
624 if (start < hole->start)
626 if (start >= hole->start && start < hole->start + hole->len)
632 * Delete a hole in the record
634 * This will do the hole split and is much restrict than add.
636 static int del_file_extent_hole(struct rb_root *holes,
639 struct file_extent_hole *hole;
640 struct file_extent_hole tmp;
645 struct rb_node *node;
652 node = rb_search(holes, &tmp, compare_hole_range, NULL);
655 hole = rb_entry(node, struct file_extent_hole, node);
656 if (start + len > hole->start + hole->len)
660 * Now there will be no overlap, delete the hole and re-add the
661 * split(s) if they exists.
663 if (start > hole->start) {
664 prev_start = hole->start;
665 prev_len = start - hole->start;
668 if (hole->start + hole->len > start + len) {
669 next_start = start + len;
670 next_len = hole->start + hole->len - start - len;
673 rb_erase(node, holes);
676 ret = add_file_extent_hole(holes, prev_start, prev_len);
681 ret = add_file_extent_hole(holes, next_start, next_len);
688 static int copy_file_extent_holes(struct rb_root *dst,
691 struct file_extent_hole *hole;
692 struct rb_node *node;
695 node = rb_first(src);
697 hole = rb_entry(node, struct file_extent_hole, node);
698 ret = add_file_extent_hole(dst, hole->start, hole->len);
701 node = rb_next(node);
706 static void free_file_extent_holes(struct rb_root *holes)
708 struct rb_node *node;
709 struct file_extent_hole *hole;
711 node = rb_first(holes);
713 hole = rb_entry(node, struct file_extent_hole, node);
714 rb_erase(node, holes);
716 node = rb_first(holes);
720 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
722 static void record_root_in_trans(struct btrfs_trans_handle *trans,
723 struct btrfs_root *root)
725 if (root->last_trans != trans->transid) {
726 root->track_dirty = 1;
727 root->last_trans = trans->transid;
728 root->commit_root = root->node;
729 extent_buffer_get(root->node);
733 static u8 imode_to_type(u32 imode)
736 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
737 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
738 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
739 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
740 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
741 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
742 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
743 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
746 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
750 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
752 struct device_record *rec1;
753 struct device_record *rec2;
755 rec1 = rb_entry(node1, struct device_record, node);
756 rec2 = rb_entry(node2, struct device_record, node);
757 if (rec1->devid > rec2->devid)
759 else if (rec1->devid < rec2->devid)
765 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
767 struct inode_record *rec;
768 struct inode_backref *backref;
769 struct inode_backref *orig;
770 struct inode_backref *tmp;
771 struct orphan_data_extent *src_orphan;
772 struct orphan_data_extent *dst_orphan;
777 rec = malloc(sizeof(*rec));
779 return ERR_PTR(-ENOMEM);
780 memcpy(rec, orig_rec, sizeof(*rec));
782 INIT_LIST_HEAD(&rec->backrefs);
783 INIT_LIST_HEAD(&rec->orphan_extents);
784 rec->holes = RB_ROOT;
786 list_for_each_entry(orig, &orig_rec->backrefs, list) {
787 size = sizeof(*orig) + orig->namelen + 1;
788 backref = malloc(size);
793 memcpy(backref, orig, size);
794 list_add_tail(&backref->list, &rec->backrefs);
796 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
797 dst_orphan = malloc(sizeof(*dst_orphan));
802 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
803 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
805 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
812 rb = rb_first(&rec->holes);
814 struct file_extent_hole *hole;
816 hole = rb_entry(rb, struct file_extent_hole, node);
822 if (!list_empty(&rec->backrefs))
823 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
824 list_del(&orig->list);
828 if (!list_empty(&rec->orphan_extents))
829 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
830 list_del(&orig->list);
839 static void print_orphan_data_extents(struct list_head *orphan_extents,
842 struct orphan_data_extent *orphan;
844 if (list_empty(orphan_extents))
846 printf("The following data extent is lost in tree %llu:\n",
848 list_for_each_entry(orphan, orphan_extents, list) {
849 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
850 orphan->objectid, orphan->offset, orphan->disk_bytenr,
855 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
857 u64 root_objectid = root->root_key.objectid;
858 int errors = rec->errors;
862 /* reloc root errors, we print its corresponding fs root objectid*/
863 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
864 root_objectid = root->root_key.offset;
865 fprintf(stderr, "reloc");
867 fprintf(stderr, "root %llu inode %llu errors %x",
868 (unsigned long long) root_objectid,
869 (unsigned long long) rec->ino, rec->errors);
871 if (errors & I_ERR_NO_INODE_ITEM)
872 fprintf(stderr, ", no inode item");
873 if (errors & I_ERR_NO_ORPHAN_ITEM)
874 fprintf(stderr, ", no orphan item");
875 if (errors & I_ERR_DUP_INODE_ITEM)
876 fprintf(stderr, ", dup inode item");
877 if (errors & I_ERR_DUP_DIR_INDEX)
878 fprintf(stderr, ", dup dir index");
879 if (errors & I_ERR_ODD_DIR_ITEM)
880 fprintf(stderr, ", odd dir item");
881 if (errors & I_ERR_ODD_FILE_EXTENT)
882 fprintf(stderr, ", odd file extent");
883 if (errors & I_ERR_BAD_FILE_EXTENT)
884 fprintf(stderr, ", bad file extent");
885 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
886 fprintf(stderr, ", file extent overlap");
887 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
888 fprintf(stderr, ", file extent discount");
889 if (errors & I_ERR_DIR_ISIZE_WRONG)
890 fprintf(stderr, ", dir isize wrong");
891 if (errors & I_ERR_FILE_NBYTES_WRONG)
892 fprintf(stderr, ", nbytes wrong");
893 if (errors & I_ERR_ODD_CSUM_ITEM)
894 fprintf(stderr, ", odd csum item");
895 if (errors & I_ERR_SOME_CSUM_MISSING)
896 fprintf(stderr, ", some csum missing");
897 if (errors & I_ERR_LINK_COUNT_WRONG)
898 fprintf(stderr, ", link count wrong");
899 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
900 fprintf(stderr, ", orphan file extent");
901 fprintf(stderr, "\n");
902 /* Print the orphan extents if needed */
903 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
904 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
906 /* Print the holes if needed */
907 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
908 struct file_extent_hole *hole;
909 struct rb_node *node;
912 node = rb_first(&rec->holes);
913 fprintf(stderr, "Found file extent holes:\n");
916 hole = rb_entry(node, struct file_extent_hole, node);
917 fprintf(stderr, "\tstart: %llu, len: %llu\n",
918 hole->start, hole->len);
919 node = rb_next(node);
922 fprintf(stderr, "\tstart: 0, len: %llu\n",
924 root->fs_info->sectorsize));
928 static void print_ref_error(int errors)
930 if (errors & REF_ERR_NO_DIR_ITEM)
931 fprintf(stderr, ", no dir item");
932 if (errors & REF_ERR_NO_DIR_INDEX)
933 fprintf(stderr, ", no dir index");
934 if (errors & REF_ERR_NO_INODE_REF)
935 fprintf(stderr, ", no inode ref");
936 if (errors & REF_ERR_DUP_DIR_ITEM)
937 fprintf(stderr, ", dup dir item");
938 if (errors & REF_ERR_DUP_DIR_INDEX)
939 fprintf(stderr, ", dup dir index");
940 if (errors & REF_ERR_DUP_INODE_REF)
941 fprintf(stderr, ", dup inode ref");
942 if (errors & REF_ERR_INDEX_UNMATCH)
943 fprintf(stderr, ", index mismatch");
944 if (errors & REF_ERR_FILETYPE_UNMATCH)
945 fprintf(stderr, ", filetype mismatch");
946 if (errors & REF_ERR_NAME_TOO_LONG)
947 fprintf(stderr, ", name too long");
948 if (errors & REF_ERR_NO_ROOT_REF)
949 fprintf(stderr, ", no root ref");
950 if (errors & REF_ERR_NO_ROOT_BACKREF)
951 fprintf(stderr, ", no root backref");
952 if (errors & REF_ERR_DUP_ROOT_REF)
953 fprintf(stderr, ", dup root ref");
954 if (errors & REF_ERR_DUP_ROOT_BACKREF)
955 fprintf(stderr, ", dup root backref");
956 fprintf(stderr, "\n");
959 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
962 struct ptr_node *node;
963 struct cache_extent *cache;
964 struct inode_record *rec = NULL;
967 cache = lookup_cache_extent(inode_cache, ino, 1);
969 node = container_of(cache, struct ptr_node, cache);
971 if (mod && rec->refs > 1) {
972 node->data = clone_inode_rec(rec);
973 if (IS_ERR(node->data))
979 rec = calloc(1, sizeof(*rec));
981 return ERR_PTR(-ENOMEM);
983 rec->extent_start = (u64)-1;
985 INIT_LIST_HEAD(&rec->backrefs);
986 INIT_LIST_HEAD(&rec->orphan_extents);
987 rec->holes = RB_ROOT;
989 node = malloc(sizeof(*node));
992 return ERR_PTR(-ENOMEM);
994 node->cache.start = ino;
995 node->cache.size = 1;
998 if (ino == BTRFS_FREE_INO_OBJECTID)
1001 ret = insert_cache_extent(inode_cache, &node->cache);
1003 return ERR_PTR(-EEXIST);
1008 static void free_orphan_data_extents(struct list_head *orphan_extents)
1010 struct orphan_data_extent *orphan;
1012 while (!list_empty(orphan_extents)) {
1013 orphan = list_entry(orphan_extents->next,
1014 struct orphan_data_extent, list);
1015 list_del(&orphan->list);
1020 static void free_inode_rec(struct inode_record *rec)
1022 struct inode_backref *backref;
1024 if (--rec->refs > 0)
1027 while (!list_empty(&rec->backrefs)) {
1028 backref = to_inode_backref(rec->backrefs.next);
1029 list_del(&backref->list);
1032 free_orphan_data_extents(&rec->orphan_extents);
1033 free_file_extent_holes(&rec->holes);
1037 static int can_free_inode_rec(struct inode_record *rec)
1039 if (!rec->errors && rec->checked && rec->found_inode_item &&
1040 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1045 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1046 struct inode_record *rec)
1048 struct cache_extent *cache;
1049 struct inode_backref *tmp, *backref;
1050 struct ptr_node *node;
1053 if (!rec->found_inode_item)
1056 filetype = imode_to_type(rec->imode);
1057 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1058 if (backref->found_dir_item && backref->found_dir_index) {
1059 if (backref->filetype != filetype)
1060 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1061 if (!backref->errors && backref->found_inode_ref &&
1062 rec->nlink == rec->found_link) {
1063 list_del(&backref->list);
1069 if (!rec->checked || rec->merging)
1072 if (S_ISDIR(rec->imode)) {
1073 if (rec->found_size != rec->isize)
1074 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1075 if (rec->found_file_extent)
1076 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1077 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1078 if (rec->found_dir_item)
1079 rec->errors |= I_ERR_ODD_DIR_ITEM;
1080 if (rec->found_size != rec->nbytes)
1081 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1082 if (rec->nlink > 0 && !no_holes &&
1083 (rec->extent_end < rec->isize ||
1084 first_extent_gap(&rec->holes) < rec->isize))
1085 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1088 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1089 if (rec->found_csum_item && rec->nodatasum)
1090 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1091 if (rec->some_csum_missing && !rec->nodatasum)
1092 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1095 BUG_ON(rec->refs != 1);
1096 if (can_free_inode_rec(rec)) {
1097 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1098 node = container_of(cache, struct ptr_node, cache);
1099 BUG_ON(node->data != rec);
1100 remove_cache_extent(inode_cache, &node->cache);
1102 free_inode_rec(rec);
1106 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1108 struct btrfs_path path;
1109 struct btrfs_key key;
1112 key.objectid = BTRFS_ORPHAN_OBJECTID;
1113 key.type = BTRFS_ORPHAN_ITEM_KEY;
1116 btrfs_init_path(&path);
1117 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1118 btrfs_release_path(&path);
1124 static int process_inode_item(struct extent_buffer *eb,
1125 int slot, struct btrfs_key *key,
1126 struct shared_node *active_node)
1128 struct inode_record *rec;
1129 struct btrfs_inode_item *item;
1131 rec = active_node->current;
1132 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1133 if (rec->found_inode_item) {
1134 rec->errors |= I_ERR_DUP_INODE_ITEM;
1137 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1138 rec->nlink = btrfs_inode_nlink(eb, item);
1139 rec->isize = btrfs_inode_size(eb, item);
1140 rec->nbytes = btrfs_inode_nbytes(eb, item);
1141 rec->imode = btrfs_inode_mode(eb, item);
1142 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1144 rec->found_inode_item = 1;
1145 if (rec->nlink == 0)
1146 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1147 maybe_free_inode_rec(&active_node->inode_cache, rec);
1151 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1153 int namelen, u64 dir)
1155 struct inode_backref *backref;
1157 list_for_each_entry(backref, &rec->backrefs, list) {
1158 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1160 if (backref->dir != dir || backref->namelen != namelen)
1162 if (memcmp(name, backref->name, namelen))
1167 backref = malloc(sizeof(*backref) + namelen + 1);
1170 memset(backref, 0, sizeof(*backref));
1172 backref->namelen = namelen;
1173 memcpy(backref->name, name, namelen);
1174 backref->name[namelen] = '\0';
1175 list_add_tail(&backref->list, &rec->backrefs);
1179 static int add_inode_backref(struct cache_tree *inode_cache,
1180 u64 ino, u64 dir, u64 index,
1181 const char *name, int namelen,
1182 u8 filetype, u8 itemtype, int errors)
1184 struct inode_record *rec;
1185 struct inode_backref *backref;
1187 rec = get_inode_rec(inode_cache, ino, 1);
1188 BUG_ON(IS_ERR(rec));
1189 backref = get_inode_backref(rec, name, namelen, dir);
1192 backref->errors |= errors;
1193 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1194 if (backref->found_dir_index)
1195 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1196 if (backref->found_inode_ref && backref->index != index)
1197 backref->errors |= REF_ERR_INDEX_UNMATCH;
1198 if (backref->found_dir_item && backref->filetype != filetype)
1199 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1201 backref->index = index;
1202 backref->filetype = filetype;
1203 backref->found_dir_index = 1;
1204 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1206 if (backref->found_dir_item)
1207 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1208 if (backref->found_dir_index && backref->filetype != filetype)
1209 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1211 backref->filetype = filetype;
1212 backref->found_dir_item = 1;
1213 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1214 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1215 if (backref->found_inode_ref)
1216 backref->errors |= REF_ERR_DUP_INODE_REF;
1217 if (backref->found_dir_index && backref->index != index)
1218 backref->errors |= REF_ERR_INDEX_UNMATCH;
1220 backref->index = index;
1222 backref->ref_type = itemtype;
1223 backref->found_inode_ref = 1;
1228 maybe_free_inode_rec(inode_cache, rec);
1232 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1233 struct cache_tree *dst_cache)
1235 struct inode_backref *backref;
1240 list_for_each_entry(backref, &src->backrefs, list) {
1241 if (backref->found_dir_index) {
1242 add_inode_backref(dst_cache, dst->ino, backref->dir,
1243 backref->index, backref->name,
1244 backref->namelen, backref->filetype,
1245 BTRFS_DIR_INDEX_KEY, backref->errors);
1247 if (backref->found_dir_item) {
1249 add_inode_backref(dst_cache, dst->ino,
1250 backref->dir, 0, backref->name,
1251 backref->namelen, backref->filetype,
1252 BTRFS_DIR_ITEM_KEY, backref->errors);
1254 if (backref->found_inode_ref) {
1255 add_inode_backref(dst_cache, dst->ino,
1256 backref->dir, backref->index,
1257 backref->name, backref->namelen, 0,
1258 backref->ref_type, backref->errors);
1262 if (src->found_dir_item)
1263 dst->found_dir_item = 1;
1264 if (src->found_file_extent)
1265 dst->found_file_extent = 1;
1266 if (src->found_csum_item)
1267 dst->found_csum_item = 1;
1268 if (src->some_csum_missing)
1269 dst->some_csum_missing = 1;
1270 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1271 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1276 BUG_ON(src->found_link < dir_count);
1277 dst->found_link += src->found_link - dir_count;
1278 dst->found_size += src->found_size;
1279 if (src->extent_start != (u64)-1) {
1280 if (dst->extent_start == (u64)-1) {
1281 dst->extent_start = src->extent_start;
1282 dst->extent_end = src->extent_end;
1284 if (dst->extent_end > src->extent_start)
1285 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1286 else if (dst->extent_end < src->extent_start) {
1287 ret = add_file_extent_hole(&dst->holes,
1289 src->extent_start - dst->extent_end);
1291 if (dst->extent_end < src->extent_end)
1292 dst->extent_end = src->extent_end;
1296 dst->errors |= src->errors;
1297 if (src->found_inode_item) {
1298 if (!dst->found_inode_item) {
1299 dst->nlink = src->nlink;
1300 dst->isize = src->isize;
1301 dst->nbytes = src->nbytes;
1302 dst->imode = src->imode;
1303 dst->nodatasum = src->nodatasum;
1304 dst->found_inode_item = 1;
1306 dst->errors |= I_ERR_DUP_INODE_ITEM;
1314 static int splice_shared_node(struct shared_node *src_node,
1315 struct shared_node *dst_node)
1317 struct cache_extent *cache;
1318 struct ptr_node *node, *ins;
1319 struct cache_tree *src, *dst;
1320 struct inode_record *rec, *conflict;
1321 u64 current_ino = 0;
1325 if (--src_node->refs == 0)
1327 if (src_node->current)
1328 current_ino = src_node->current->ino;
1330 src = &src_node->root_cache;
1331 dst = &dst_node->root_cache;
1333 cache = search_cache_extent(src, 0);
1335 node = container_of(cache, struct ptr_node, cache);
1337 cache = next_cache_extent(cache);
1340 remove_cache_extent(src, &node->cache);
1343 ins = malloc(sizeof(*ins));
1345 ins->cache.start = node->cache.start;
1346 ins->cache.size = node->cache.size;
1350 ret = insert_cache_extent(dst, &ins->cache);
1351 if (ret == -EEXIST) {
1352 conflict = get_inode_rec(dst, rec->ino, 1);
1353 BUG_ON(IS_ERR(conflict));
1354 merge_inode_recs(rec, conflict, dst);
1356 conflict->checked = 1;
1357 if (dst_node->current == conflict)
1358 dst_node->current = NULL;
1360 maybe_free_inode_rec(dst, conflict);
1361 free_inode_rec(rec);
1368 if (src == &src_node->root_cache) {
1369 src = &src_node->inode_cache;
1370 dst = &dst_node->inode_cache;
1374 if (current_ino > 0 && (!dst_node->current ||
1375 current_ino > dst_node->current->ino)) {
1376 if (dst_node->current) {
1377 dst_node->current->checked = 1;
1378 maybe_free_inode_rec(dst, dst_node->current);
1380 dst_node->current = get_inode_rec(dst, current_ino, 1);
1381 BUG_ON(IS_ERR(dst_node->current));
1386 static void free_inode_ptr(struct cache_extent *cache)
1388 struct ptr_node *node;
1389 struct inode_record *rec;
1391 node = container_of(cache, struct ptr_node, cache);
1393 free_inode_rec(rec);
1397 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1399 static struct shared_node *find_shared_node(struct cache_tree *shared,
1402 struct cache_extent *cache;
1403 struct shared_node *node;
1405 cache = lookup_cache_extent(shared, bytenr, 1);
1407 node = container_of(cache, struct shared_node, cache);
1413 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1416 struct shared_node *node;
1418 node = calloc(1, sizeof(*node));
1421 node->cache.start = bytenr;
1422 node->cache.size = 1;
1423 cache_tree_init(&node->root_cache);
1424 cache_tree_init(&node->inode_cache);
1427 ret = insert_cache_extent(shared, &node->cache);
1432 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1433 struct walk_control *wc, int level)
1435 struct shared_node *node;
1436 struct shared_node *dest;
1439 if (level == wc->active_node)
1442 BUG_ON(wc->active_node <= level);
1443 node = find_shared_node(&wc->shared, bytenr);
1445 ret = add_shared_node(&wc->shared, bytenr, refs);
1447 node = find_shared_node(&wc->shared, bytenr);
1448 wc->nodes[level] = node;
1449 wc->active_node = level;
1453 if (wc->root_level == wc->active_node &&
1454 btrfs_root_refs(&root->root_item) == 0) {
1455 if (--node->refs == 0) {
1456 free_inode_recs_tree(&node->root_cache);
1457 free_inode_recs_tree(&node->inode_cache);
1458 remove_cache_extent(&wc->shared, &node->cache);
1464 dest = wc->nodes[wc->active_node];
1465 splice_shared_node(node, dest);
1466 if (node->refs == 0) {
1467 remove_cache_extent(&wc->shared, &node->cache);
1473 static int leave_shared_node(struct btrfs_root *root,
1474 struct walk_control *wc, int level)
1476 struct shared_node *node;
1477 struct shared_node *dest;
1480 if (level == wc->root_level)
1483 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1487 BUG_ON(i >= BTRFS_MAX_LEVEL);
1489 node = wc->nodes[wc->active_node];
1490 wc->nodes[wc->active_node] = NULL;
1491 wc->active_node = i;
1493 dest = wc->nodes[wc->active_node];
1494 if (wc->active_node < wc->root_level ||
1495 btrfs_root_refs(&root->root_item) > 0) {
1496 BUG_ON(node->refs <= 1);
1497 splice_shared_node(node, dest);
1499 BUG_ON(node->refs < 2);
1508 * 1 - if the root with id child_root_id is a child of root parent_root_id
1509 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1510 * has other root(s) as parent(s)
1511 * 2 - if the root child_root_id doesn't have any parent roots
1513 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1516 struct btrfs_path path;
1517 struct btrfs_key key;
1518 struct extent_buffer *leaf;
1522 btrfs_init_path(&path);
1524 key.objectid = parent_root_id;
1525 key.type = BTRFS_ROOT_REF_KEY;
1526 key.offset = child_root_id;
1527 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1531 btrfs_release_path(&path);
1535 key.objectid = child_root_id;
1536 key.type = BTRFS_ROOT_BACKREF_KEY;
1538 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1544 leaf = path.nodes[0];
1545 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1546 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1549 leaf = path.nodes[0];
1552 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1553 if (key.objectid != child_root_id ||
1554 key.type != BTRFS_ROOT_BACKREF_KEY)
1559 if (key.offset == parent_root_id) {
1560 btrfs_release_path(&path);
1567 btrfs_release_path(&path);
1570 return has_parent ? 0 : 2;
1573 static int process_dir_item(struct extent_buffer *eb,
1574 int slot, struct btrfs_key *key,
1575 struct shared_node *active_node)
1585 struct btrfs_dir_item *di;
1586 struct inode_record *rec;
1587 struct cache_tree *root_cache;
1588 struct cache_tree *inode_cache;
1589 struct btrfs_key location;
1590 char namebuf[BTRFS_NAME_LEN];
1592 root_cache = &active_node->root_cache;
1593 inode_cache = &active_node->inode_cache;
1594 rec = active_node->current;
1595 rec->found_dir_item = 1;
1597 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1598 total = btrfs_item_size_nr(eb, slot);
1599 while (cur < total) {
1601 btrfs_dir_item_key_to_cpu(eb, di, &location);
1602 name_len = btrfs_dir_name_len(eb, di);
1603 data_len = btrfs_dir_data_len(eb, di);
1604 filetype = btrfs_dir_type(eb, di);
1606 rec->found_size += name_len;
1607 if (cur + sizeof(*di) + name_len > total ||
1608 name_len > BTRFS_NAME_LEN) {
1609 error = REF_ERR_NAME_TOO_LONG;
1611 if (cur + sizeof(*di) > total)
1613 len = min_t(u32, total - cur - sizeof(*di),
1620 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1622 if (key->type == BTRFS_DIR_ITEM_KEY &&
1623 key->offset != btrfs_name_hash(namebuf, len)) {
1624 rec->errors |= I_ERR_ODD_DIR_ITEM;
1625 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1626 key->objectid, key->offset, namebuf, len, filetype,
1627 key->offset, btrfs_name_hash(namebuf, len));
1630 if (location.type == BTRFS_INODE_ITEM_KEY) {
1631 add_inode_backref(inode_cache, location.objectid,
1632 key->objectid, key->offset, namebuf,
1633 len, filetype, key->type, error);
1634 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1635 add_inode_backref(root_cache, location.objectid,
1636 key->objectid, key->offset,
1637 namebuf, len, filetype,
1640 fprintf(stderr, "invalid location in dir item %u\n",
1642 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1643 key->objectid, key->offset, namebuf,
1644 len, filetype, key->type, error);
1647 len = sizeof(*di) + name_len + data_len;
1648 di = (struct btrfs_dir_item *)((char *)di + len);
1651 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1652 rec->errors |= I_ERR_DUP_DIR_INDEX;
1657 static int process_inode_ref(struct extent_buffer *eb,
1658 int slot, struct btrfs_key *key,
1659 struct shared_node *active_node)
1667 struct cache_tree *inode_cache;
1668 struct btrfs_inode_ref *ref;
1669 char namebuf[BTRFS_NAME_LEN];
1671 inode_cache = &active_node->inode_cache;
1673 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1674 total = btrfs_item_size_nr(eb, slot);
1675 while (cur < total) {
1676 name_len = btrfs_inode_ref_name_len(eb, ref);
1677 index = btrfs_inode_ref_index(eb, ref);
1679 /* inode_ref + namelen should not cross item boundary */
1680 if (cur + sizeof(*ref) + name_len > total ||
1681 name_len > BTRFS_NAME_LEN) {
1682 if (total < cur + sizeof(*ref))
1685 /* Still try to read out the remaining part */
1686 len = min_t(u32, total - cur - sizeof(*ref),
1688 error = REF_ERR_NAME_TOO_LONG;
1694 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1695 add_inode_backref(inode_cache, key->objectid, key->offset,
1696 index, namebuf, len, 0, key->type, error);
1698 len = sizeof(*ref) + name_len;
1699 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1705 static int process_inode_extref(struct extent_buffer *eb,
1706 int slot, struct btrfs_key *key,
1707 struct shared_node *active_node)
1716 struct cache_tree *inode_cache;
1717 struct btrfs_inode_extref *extref;
1718 char namebuf[BTRFS_NAME_LEN];
1720 inode_cache = &active_node->inode_cache;
1722 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1723 total = btrfs_item_size_nr(eb, slot);
1724 while (cur < total) {
1725 name_len = btrfs_inode_extref_name_len(eb, extref);
1726 index = btrfs_inode_extref_index(eb, extref);
1727 parent = btrfs_inode_extref_parent(eb, extref);
1728 if (name_len <= BTRFS_NAME_LEN) {
1732 len = BTRFS_NAME_LEN;
1733 error = REF_ERR_NAME_TOO_LONG;
1735 read_extent_buffer(eb, namebuf,
1736 (unsigned long)(extref + 1), len);
1737 add_inode_backref(inode_cache, key->objectid, parent,
1738 index, namebuf, len, 0, key->type, error);
1740 len = sizeof(*extref) + name_len;
1741 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1748 static int count_csum_range(struct btrfs_root *root, u64 start,
1749 u64 len, u64 *found)
1751 struct btrfs_key key;
1752 struct btrfs_path path;
1753 struct extent_buffer *leaf;
1758 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1760 btrfs_init_path(&path);
1762 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1764 key.type = BTRFS_EXTENT_CSUM_KEY;
1766 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1770 if (ret > 0 && path.slots[0] > 0) {
1771 leaf = path.nodes[0];
1772 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1773 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1774 key.type == BTRFS_EXTENT_CSUM_KEY)
1779 leaf = path.nodes[0];
1780 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1781 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1786 leaf = path.nodes[0];
1789 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1790 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1791 key.type != BTRFS_EXTENT_CSUM_KEY)
1794 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1795 if (key.offset >= start + len)
1798 if (key.offset > start)
1801 size = btrfs_item_size_nr(leaf, path.slots[0]);
1802 csum_end = key.offset + (size / csum_size) *
1803 root->fs_info->sectorsize;
1804 if (csum_end > start) {
1805 size = min(csum_end - start, len);
1814 btrfs_release_path(&path);
1820 static int process_file_extent(struct btrfs_root *root,
1821 struct extent_buffer *eb,
1822 int slot, struct btrfs_key *key,
1823 struct shared_node *active_node)
1825 struct inode_record *rec;
1826 struct btrfs_file_extent_item *fi;
1828 u64 disk_bytenr = 0;
1829 u64 extent_offset = 0;
1830 u64 mask = root->fs_info->sectorsize - 1;
1834 rec = active_node->current;
1835 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1836 rec->found_file_extent = 1;
1838 if (rec->extent_start == (u64)-1) {
1839 rec->extent_start = key->offset;
1840 rec->extent_end = key->offset;
1843 if (rec->extent_end > key->offset)
1844 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1845 else if (rec->extent_end < key->offset) {
1846 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1847 key->offset - rec->extent_end);
1852 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1853 extent_type = btrfs_file_extent_type(eb, fi);
1855 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1856 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1858 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1859 rec->found_size += num_bytes;
1860 num_bytes = (num_bytes + mask) & ~mask;
1861 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1862 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1863 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1864 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1865 extent_offset = btrfs_file_extent_offset(eb, fi);
1866 if (num_bytes == 0 || (num_bytes & mask))
1867 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1868 if (num_bytes + extent_offset >
1869 btrfs_file_extent_ram_bytes(eb, fi))
1870 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1871 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1872 (btrfs_file_extent_compression(eb, fi) ||
1873 btrfs_file_extent_encryption(eb, fi) ||
1874 btrfs_file_extent_other_encoding(eb, fi)))
1875 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1876 if (disk_bytenr > 0)
1877 rec->found_size += num_bytes;
1879 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1881 rec->extent_end = key->offset + num_bytes;
1884 * The data reloc tree will copy full extents into its inode and then
1885 * copy the corresponding csums. Because the extent it copied could be
1886 * a preallocated extent that hasn't been written to yet there may be no
1887 * csums to copy, ergo we won't have csums for our file extent. This is
1888 * ok so just don't bother checking csums if the inode belongs to the
1891 if (disk_bytenr > 0 &&
1892 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1894 if (btrfs_file_extent_compression(eb, fi))
1895 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1897 disk_bytenr += extent_offset;
1899 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1902 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1904 rec->found_csum_item = 1;
1905 if (found < num_bytes)
1906 rec->some_csum_missing = 1;
1907 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1909 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1915 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1916 struct walk_control *wc)
1918 struct btrfs_key key;
1922 struct cache_tree *inode_cache;
1923 struct shared_node *active_node;
1925 if (wc->root_level == wc->active_node &&
1926 btrfs_root_refs(&root->root_item) == 0)
1929 active_node = wc->nodes[wc->active_node];
1930 inode_cache = &active_node->inode_cache;
1931 nritems = btrfs_header_nritems(eb);
1932 for (i = 0; i < nritems; i++) {
1933 btrfs_item_key_to_cpu(eb, &key, i);
1935 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1937 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1940 if (active_node->current == NULL ||
1941 active_node->current->ino < key.objectid) {
1942 if (active_node->current) {
1943 active_node->current->checked = 1;
1944 maybe_free_inode_rec(inode_cache,
1945 active_node->current);
1947 active_node->current = get_inode_rec(inode_cache,
1949 BUG_ON(IS_ERR(active_node->current));
1952 case BTRFS_DIR_ITEM_KEY:
1953 case BTRFS_DIR_INDEX_KEY:
1954 ret = process_dir_item(eb, i, &key, active_node);
1956 case BTRFS_INODE_REF_KEY:
1957 ret = process_inode_ref(eb, i, &key, active_node);
1959 case BTRFS_INODE_EXTREF_KEY:
1960 ret = process_inode_extref(eb, i, &key, active_node);
1962 case BTRFS_INODE_ITEM_KEY:
1963 ret = process_inode_item(eb, i, &key, active_node);
1965 case BTRFS_EXTENT_DATA_KEY:
1966 ret = process_file_extent(root, eb, i, &key,
1977 u64 bytenr[BTRFS_MAX_LEVEL];
1978 u64 refs[BTRFS_MAX_LEVEL];
1979 int need_check[BTRFS_MAX_LEVEL];
1982 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1983 struct node_refs *nrefs, u64 level);
1984 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1985 unsigned int ext_ref);
1988 * Returns >0 Found error, not fatal, should continue
1989 * Returns <0 Fatal error, must exit the whole check
1990 * Returns 0 No errors found
1992 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1993 struct node_refs *nrefs, int *level, int ext_ref)
1995 struct extent_buffer *cur = path->nodes[0];
1996 struct btrfs_key key;
2000 int root_level = btrfs_header_level(root->node);
2002 int ret = 0; /* Final return value */
2003 int err = 0; /* Positive error bitmap */
2005 cur_bytenr = cur->start;
2007 /* skip to first inode item or the first inode number change */
2008 nritems = btrfs_header_nritems(cur);
2009 for (i = 0; i < nritems; i++) {
2010 btrfs_item_key_to_cpu(cur, &key, i);
2012 first_ino = key.objectid;
2013 if (key.type == BTRFS_INODE_ITEM_KEY ||
2014 (first_ino && first_ino != key.objectid))
2018 path->slots[0] = nritems;
2024 err |= check_inode_item(root, path, ext_ref);
2026 if (err & LAST_ITEM)
2029 /* still have inode items in thie leaf */
2030 if (cur->start == cur_bytenr)
2034 * we have switched to another leaf, above nodes may
2035 * have changed, here walk down the path, if a node
2036 * or leaf is shared, check whether we can skip this
2039 for (i = root_level; i >= 0; i--) {
2040 if (path->nodes[i]->start == nrefs->bytenr[i])
2043 ret = update_nodes_refs(root,
2044 path->nodes[i]->start,
2049 if (!nrefs->need_check[i]) {
2055 for (i = 0; i < *level; i++) {
2056 free_extent_buffer(path->nodes[i]);
2057 path->nodes[i] = NULL;
2066 static void reada_walk_down(struct btrfs_root *root,
2067 struct extent_buffer *node, int slot)
2069 struct btrfs_fs_info *fs_info = root->fs_info;
2076 level = btrfs_header_level(node);
2080 nritems = btrfs_header_nritems(node);
2081 for (i = slot; i < nritems; i++) {
2082 bytenr = btrfs_node_blockptr(node, i);
2083 ptr_gen = btrfs_node_ptr_generation(node, i);
2084 readahead_tree_block(fs_info, bytenr, ptr_gen);
2089 * Check the child node/leaf by the following condition:
2090 * 1. the first item key of the node/leaf should be the same with the one
2092 * 2. block in parent node should match the child node/leaf.
2093 * 3. generation of parent node and child's header should be consistent.
2095 * Or the child node/leaf pointed by the key in parent is not valid.
2097 * We hope to check leaf owner too, but since subvol may share leaves,
2098 * which makes leaf owner check not so strong, key check should be
2099 * sufficient enough for that case.
2101 static int check_child_node(struct extent_buffer *parent, int slot,
2102 struct extent_buffer *child)
2104 struct btrfs_key parent_key;
2105 struct btrfs_key child_key;
2108 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2109 if (btrfs_header_level(child) == 0)
2110 btrfs_item_key_to_cpu(child, &child_key, 0);
2112 btrfs_node_key_to_cpu(child, &child_key, 0);
2114 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2117 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2118 parent_key.objectid, parent_key.type, parent_key.offset,
2119 child_key.objectid, child_key.type, child_key.offset);
2121 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2123 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2124 btrfs_node_blockptr(parent, slot),
2125 btrfs_header_bytenr(child));
2127 if (btrfs_node_ptr_generation(parent, slot) !=
2128 btrfs_header_generation(child)) {
2130 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2131 btrfs_header_generation(child),
2132 btrfs_node_ptr_generation(parent, slot));
2138 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2139 * in every fs or file tree check. Here we find its all root ids, and only check
2140 * it in the fs or file tree which has the smallest root id.
2142 static int need_check(struct btrfs_root *root, struct ulist *roots)
2144 struct rb_node *node;
2145 struct ulist_node *u;
2147 if (roots->nnodes == 1)
2150 node = rb_first(&roots->root);
2151 u = rb_entry(node, struct ulist_node, rb_node);
2153 * current root id is not smallest, we skip it and let it be checked
2154 * in the fs or file tree who hash the smallest root id.
2156 if (root->objectid != u->val)
2163 * for a tree node or leaf, we record its reference count, so later if we still
2164 * process this node or leaf, don't need to compute its reference count again.
2166 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2167 struct node_refs *nrefs, u64 level)
2171 struct ulist *roots;
2173 if (nrefs->bytenr[level] != bytenr) {
2174 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2175 level, 1, &refs, NULL);
2179 nrefs->bytenr[level] = bytenr;
2180 nrefs->refs[level] = refs;
2182 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2187 check = need_check(root, roots);
2189 nrefs->need_check[level] = check;
2191 nrefs->need_check[level] = 1;
2198 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2199 struct walk_control *wc, int *level,
2200 struct node_refs *nrefs)
2202 enum btrfs_tree_block_status status;
2205 struct btrfs_fs_info *fs_info = root->fs_info;
2206 struct extent_buffer *next;
2207 struct extent_buffer *cur;
2211 WARN_ON(*level < 0);
2212 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2214 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2215 refs = nrefs->refs[*level];
2218 ret = btrfs_lookup_extent_info(NULL, root,
2219 path->nodes[*level]->start,
2220 *level, 1, &refs, NULL);
2225 nrefs->bytenr[*level] = path->nodes[*level]->start;
2226 nrefs->refs[*level] = refs;
2230 ret = enter_shared_node(root, path->nodes[*level]->start,
2238 while (*level >= 0) {
2239 WARN_ON(*level < 0);
2240 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2241 cur = path->nodes[*level];
2243 if (btrfs_header_level(cur) != *level)
2246 if (path->slots[*level] >= btrfs_header_nritems(cur))
2249 ret = process_one_leaf(root, cur, wc);
2254 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2255 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2257 if (bytenr == nrefs->bytenr[*level - 1]) {
2258 refs = nrefs->refs[*level - 1];
2260 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2261 *level - 1, 1, &refs, NULL);
2265 nrefs->bytenr[*level - 1] = bytenr;
2266 nrefs->refs[*level - 1] = refs;
2271 ret = enter_shared_node(root, bytenr, refs,
2274 path->slots[*level]++;
2279 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2280 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2281 free_extent_buffer(next);
2282 reada_walk_down(root, cur, path->slots[*level]);
2283 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2284 if (!extent_buffer_uptodate(next)) {
2285 struct btrfs_key node_key;
2287 btrfs_node_key_to_cpu(path->nodes[*level],
2289 path->slots[*level]);
2290 btrfs_add_corrupt_extent_record(root->fs_info,
2292 path->nodes[*level]->start,
2293 root->fs_info->nodesize,
2300 ret = check_child_node(cur, path->slots[*level], next);
2302 free_extent_buffer(next);
2307 if (btrfs_is_leaf(next))
2308 status = btrfs_check_leaf(root, NULL, next);
2310 status = btrfs_check_node(root, NULL, next);
2311 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2312 free_extent_buffer(next);
2317 *level = *level - 1;
2318 free_extent_buffer(path->nodes[*level]);
2319 path->nodes[*level] = next;
2320 path->slots[*level] = 0;
2323 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2327 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2328 unsigned int ext_ref);
2331 * Returns >0 Found error, should continue
2332 * Returns <0 Fatal error, must exit the whole check
2333 * Returns 0 No errors found
2335 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2336 int *level, struct node_refs *nrefs, int ext_ref)
2338 enum btrfs_tree_block_status status;
2341 struct btrfs_fs_info *fs_info = root->fs_info;
2342 struct extent_buffer *next;
2343 struct extent_buffer *cur;
2346 WARN_ON(*level < 0);
2347 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2349 ret = update_nodes_refs(root, path->nodes[*level]->start,
2354 while (*level >= 0) {
2355 WARN_ON(*level < 0);
2356 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2357 cur = path->nodes[*level];
2359 if (btrfs_header_level(cur) != *level)
2362 if (path->slots[*level] >= btrfs_header_nritems(cur))
2364 /* Don't forgot to check leaf/node validation */
2366 ret = btrfs_check_leaf(root, NULL, cur);
2367 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2371 ret = process_one_leaf_v2(root, path, nrefs,
2375 ret = btrfs_check_node(root, NULL, cur);
2376 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2381 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2382 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2384 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2387 if (!nrefs->need_check[*level - 1]) {
2388 path->slots[*level]++;
2392 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2393 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2394 free_extent_buffer(next);
2395 reada_walk_down(root, cur, path->slots[*level]);
2396 next = read_tree_block(fs_info, bytenr, ptr_gen);
2397 if (!extent_buffer_uptodate(next)) {
2398 struct btrfs_key node_key;
2400 btrfs_node_key_to_cpu(path->nodes[*level],
2402 path->slots[*level]);
2403 btrfs_add_corrupt_extent_record(fs_info,
2405 path->nodes[*level]->start,
2413 ret = check_child_node(cur, path->slots[*level], next);
2417 if (btrfs_is_leaf(next))
2418 status = btrfs_check_leaf(root, NULL, next);
2420 status = btrfs_check_node(root, NULL, next);
2421 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2422 free_extent_buffer(next);
2427 *level = *level - 1;
2428 free_extent_buffer(path->nodes[*level]);
2429 path->nodes[*level] = next;
2430 path->slots[*level] = 0;
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436 struct walk_control *wc, int *level)
2439 struct extent_buffer *leaf;
2441 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442 leaf = path->nodes[i];
2443 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2448 free_extent_buffer(path->nodes[*level]);
2449 path->nodes[*level] = NULL;
2450 BUG_ON(*level > wc->active_node);
2451 if (*level == wc->active_node)
2452 leave_shared_node(root, wc, *level);
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2463 struct extent_buffer *leaf;
2465 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466 leaf = path->nodes[i];
2467 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2472 free_extent_buffer(path->nodes[*level]);
2473 path->nodes[*level] = NULL;
2480 static int check_root_dir(struct inode_record *rec)
2482 struct inode_backref *backref;
2485 if (!rec->found_inode_item || rec->errors)
2487 if (rec->nlink != 1 || rec->found_link != 0)
2489 if (list_empty(&rec->backrefs))
2491 backref = to_inode_backref(rec->backrefs.next);
2492 if (!backref->found_inode_ref)
2494 if (backref->index != 0 || backref->namelen != 2 ||
2495 memcmp(backref->name, "..", 2))
2497 if (backref->found_dir_index || backref->found_dir_item)
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505 struct btrfs_root *root, struct btrfs_path *path,
2506 struct inode_record *rec)
2508 struct btrfs_inode_item *ei;
2509 struct btrfs_key key;
2512 key.objectid = rec->ino;
2513 key.type = BTRFS_INODE_ITEM_KEY;
2514 key.offset = (u64)-1;
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (!path->slots[0]) {
2527 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528 if (key.objectid != rec->ino) {
2533 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534 struct btrfs_inode_item);
2535 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536 btrfs_mark_buffer_dirty(path->nodes[0]);
2537 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539 root->root_key.objectid);
2541 btrfs_release_path(path);
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546 struct btrfs_root *root,
2547 struct btrfs_path *path,
2548 struct inode_record *rec)
2552 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553 btrfs_release_path(path);
2555 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560 struct btrfs_root *root,
2561 struct btrfs_path *path,
2562 struct inode_record *rec)
2564 struct btrfs_inode_item *ei;
2565 struct btrfs_key key;
2568 key.objectid = rec->ino;
2569 key.type = BTRFS_INODE_ITEM_KEY;
2572 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2579 /* Since ret == 0, no need to check anything */
2580 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581 struct btrfs_inode_item);
2582 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583 btrfs_mark_buffer_dirty(path->nodes[0]);
2584 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585 printf("reset nbytes for ino %llu root %llu\n",
2586 rec->ino, root->root_key.objectid);
2588 btrfs_release_path(path);
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593 struct cache_tree *inode_cache,
2594 struct inode_record *rec,
2595 struct inode_backref *backref)
2597 struct btrfs_path path;
2598 struct btrfs_trans_handle *trans;
2599 struct btrfs_dir_item *dir_item;
2600 struct extent_buffer *leaf;
2601 struct btrfs_key key;
2602 struct btrfs_disk_key disk_key;
2603 struct inode_record *dir_rec;
2604 unsigned long name_ptr;
2605 u32 data_size = sizeof(*dir_item) + backref->namelen;
2608 trans = btrfs_start_transaction(root, 1);
2610 return PTR_ERR(trans);
2612 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613 (unsigned long long)rec->ino);
2615 btrfs_init_path(&path);
2616 key.objectid = backref->dir;
2617 key.type = BTRFS_DIR_INDEX_KEY;
2618 key.offset = backref->index;
2619 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2622 leaf = path.nodes[0];
2623 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2625 disk_key.objectid = cpu_to_le64(rec->ino);
2626 disk_key.type = BTRFS_INODE_ITEM_KEY;
2627 disk_key.offset = 0;
2629 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631 btrfs_set_dir_data_len(leaf, dir_item, 0);
2632 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633 name_ptr = (unsigned long)(dir_item + 1);
2634 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635 btrfs_mark_buffer_dirty(leaf);
2636 btrfs_release_path(&path);
2637 btrfs_commit_transaction(trans, root);
2639 backref->found_dir_index = 1;
2640 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641 BUG_ON(IS_ERR(dir_rec));
2644 dir_rec->found_size += backref->namelen;
2645 if (dir_rec->found_size == dir_rec->isize &&
2646 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648 if (dir_rec->found_size != dir_rec->isize)
2649 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2654 static int delete_dir_index(struct btrfs_root *root,
2655 struct inode_backref *backref)
2657 struct btrfs_trans_handle *trans;
2658 struct btrfs_dir_item *di;
2659 struct btrfs_path path;
2662 trans = btrfs_start_transaction(root, 1);
2664 return PTR_ERR(trans);
2666 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667 (unsigned long long)backref->dir,
2668 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669 (unsigned long long)root->objectid);
2671 btrfs_init_path(&path);
2672 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673 backref->name, backref->namelen,
2674 backref->index, -1);
2677 btrfs_release_path(&path);
2678 btrfs_commit_transaction(trans, root);
2685 ret = btrfs_del_item(trans, root, &path);
2687 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2689 btrfs_release_path(&path);
2690 btrfs_commit_transaction(trans, root);
2694 static int create_inode_item(struct btrfs_root *root,
2695 struct inode_record *rec,
2698 struct btrfs_trans_handle *trans;
2699 struct btrfs_inode_item inode_item;
2700 time_t now = time(NULL);
2703 trans = btrfs_start_transaction(root, 1);
2704 if (IS_ERR(trans)) {
2705 ret = PTR_ERR(trans);
2709 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2710 "be incomplete, please check permissions and content after "
2711 "the fsck completes.\n", (unsigned long long)root->objectid,
2712 (unsigned long long)rec->ino);
2714 memset(&inode_item, 0, sizeof(inode_item));
2715 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2717 btrfs_set_stack_inode_nlink(&inode_item, 1);
2719 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2720 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2721 if (rec->found_dir_item) {
2722 if (rec->found_file_extent)
2723 fprintf(stderr, "root %llu inode %llu has both a dir "
2724 "item and extents, unsure if it is a dir or a "
2725 "regular file so setting it as a directory\n",
2726 (unsigned long long)root->objectid,
2727 (unsigned long long)rec->ino);
2728 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2729 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2730 } else if (!rec->found_dir_item) {
2731 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2732 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2734 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2735 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2736 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2737 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2738 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2739 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2740 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2741 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2743 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2745 btrfs_commit_transaction(trans, root);
2749 static int repair_inode_backrefs(struct btrfs_root *root,
2750 struct inode_record *rec,
2751 struct cache_tree *inode_cache,
2754 struct inode_backref *tmp, *backref;
2755 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2759 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2760 if (!delete && rec->ino == root_dirid) {
2761 if (!rec->found_inode_item) {
2762 ret = create_inode_item(root, rec, 1);
2769 /* Index 0 for root dir's are special, don't mess with it */
2770 if (rec->ino == root_dirid && backref->index == 0)
2774 ((backref->found_dir_index && !backref->found_inode_ref) ||
2775 (backref->found_dir_index && backref->found_inode_ref &&
2776 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2777 ret = delete_dir_index(root, backref);
2781 list_del(&backref->list);
2786 if (!delete && !backref->found_dir_index &&
2787 backref->found_dir_item && backref->found_inode_ref) {
2788 ret = add_missing_dir_index(root, inode_cache, rec,
2793 if (backref->found_dir_item &&
2794 backref->found_dir_index) {
2795 if (!backref->errors &&
2796 backref->found_inode_ref) {
2797 list_del(&backref->list);
2804 if (!delete && (!backref->found_dir_index &&
2805 !backref->found_dir_item &&
2806 backref->found_inode_ref)) {
2807 struct btrfs_trans_handle *trans;
2808 struct btrfs_key location;
2810 ret = check_dir_conflict(root, backref->name,
2816 * let nlink fixing routine to handle it,
2817 * which can do it better.
2822 location.objectid = rec->ino;
2823 location.type = BTRFS_INODE_ITEM_KEY;
2824 location.offset = 0;
2826 trans = btrfs_start_transaction(root, 1);
2827 if (IS_ERR(trans)) {
2828 ret = PTR_ERR(trans);
2831 fprintf(stderr, "adding missing dir index/item pair "
2833 (unsigned long long)rec->ino);
2834 ret = btrfs_insert_dir_item(trans, root, backref->name,
2836 backref->dir, &location,
2837 imode_to_type(rec->imode),
2840 btrfs_commit_transaction(trans, root);
2844 if (!delete && (backref->found_inode_ref &&
2845 backref->found_dir_index &&
2846 backref->found_dir_item &&
2847 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2848 !rec->found_inode_item)) {
2849 ret = create_inode_item(root, rec, 0);
2856 return ret ? ret : repaired;
2860 * To determine the file type for nlink/inode_item repair
2862 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2863 * Return -ENOENT if file type is not found.
2865 static int find_file_type(struct inode_record *rec, u8 *type)
2867 struct inode_backref *backref;
2869 /* For inode item recovered case */
2870 if (rec->found_inode_item) {
2871 *type = imode_to_type(rec->imode);
2875 list_for_each_entry(backref, &rec->backrefs, list) {
2876 if (backref->found_dir_index || backref->found_dir_item) {
2877 *type = backref->filetype;
2885 * To determine the file name for nlink repair
2887 * Return 0 if file name is found, set name and namelen.
2888 * Return -ENOENT if file name is not found.
2890 static int find_file_name(struct inode_record *rec,
2891 char *name, int *namelen)
2893 struct inode_backref *backref;
2895 list_for_each_entry(backref, &rec->backrefs, list) {
2896 if (backref->found_dir_index || backref->found_dir_item ||
2897 backref->found_inode_ref) {
2898 memcpy(name, backref->name, backref->namelen);
2899 *namelen = backref->namelen;
2906 /* Reset the nlink of the inode to the correct one */
2907 static int reset_nlink(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 struct inode_backref *backref;
2913 struct inode_backref *tmp;
2914 struct btrfs_key key;
2915 struct btrfs_inode_item *inode_item;
2918 /* We don't believe this either, reset it and iterate backref */
2919 rec->found_link = 0;
2921 /* Remove all backref including the valid ones */
2922 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2923 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2924 backref->index, backref->name,
2925 backref->namelen, 0);
2929 /* remove invalid backref, so it won't be added back */
2930 if (!(backref->found_dir_index &&
2931 backref->found_dir_item &&
2932 backref->found_inode_ref)) {
2933 list_del(&backref->list);
2940 /* Set nlink to 0 */
2941 key.objectid = rec->ino;
2942 key.type = BTRFS_INODE_ITEM_KEY;
2944 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2951 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2952 struct btrfs_inode_item);
2953 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2954 btrfs_mark_buffer_dirty(path->nodes[0]);
2955 btrfs_release_path(path);
2958 * Add back valid inode_ref/dir_item/dir_index,
2959 * add_link() will handle the nlink inc, so new nlink must be correct
2961 list_for_each_entry(backref, &rec->backrefs, list) {
2962 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2963 backref->name, backref->namelen,
2964 backref->filetype, &backref->index, 1);
2969 btrfs_release_path(path);
2973 static int get_highest_inode(struct btrfs_trans_handle *trans,
2974 struct btrfs_root *root,
2975 struct btrfs_path *path,
2978 struct btrfs_key key, found_key;
2981 btrfs_init_path(path);
2982 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2984 key.type = BTRFS_INODE_ITEM_KEY;
2985 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2987 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2988 path->slots[0] - 1);
2989 *highest_ino = found_key.objectid;
2992 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2994 btrfs_release_path(path);
2998 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2999 struct btrfs_root *root,
3000 struct btrfs_path *path,
3001 struct inode_record *rec)
3003 char *dir_name = "lost+found";
3004 char namebuf[BTRFS_NAME_LEN] = {0};
3009 int name_recovered = 0;
3010 int type_recovered = 0;
3014 * Get file name and type first before these invalid inode ref
3015 * are deleted by remove_all_invalid_backref()
3017 name_recovered = !find_file_name(rec, namebuf, &namelen);
3018 type_recovered = !find_file_type(rec, &type);
3020 if (!name_recovered) {
3021 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3022 rec->ino, rec->ino);
3023 namelen = count_digits(rec->ino);
3024 sprintf(namebuf, "%llu", rec->ino);
3027 if (!type_recovered) {
3028 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3030 type = BTRFS_FT_REG_FILE;
3034 ret = reset_nlink(trans, root, path, rec);
3037 "Failed to reset nlink for inode %llu: %s\n",
3038 rec->ino, strerror(-ret));
3042 if (rec->found_link == 0) {
3043 ret = get_highest_inode(trans, root, path, &lost_found_ino);
3047 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3048 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3051 fprintf(stderr, "Failed to create '%s' dir: %s\n",
3052 dir_name, strerror(-ret));
3055 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
3056 namebuf, namelen, type, NULL, 1);
3058 * Add ".INO" suffix several times to handle case where
3059 * "FILENAME.INO" is already taken by another file.
3061 while (ret == -EEXIST) {
3063 * Conflicting file name, add ".INO" as suffix * +1 for '.'
3065 if (namelen + count_digits(rec->ino) + 1 >
3070 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
3072 namelen += count_digits(rec->ino) + 1;
3073 ret = btrfs_add_link(trans, root, rec->ino,
3074 lost_found_ino, namebuf,
3075 namelen, type, NULL, 1);
3079 "Failed to link the inode %llu to %s dir: %s\n",
3080 rec->ino, dir_name, strerror(-ret));
3084 * Just increase the found_link, don't actually add the
3085 * backref. This will make things easier and this inode
3086 * record will be freed after the repair is done.
3087 * So fsck will not report problem about this inode.
3090 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3091 namelen, namebuf, dir_name);
3093 printf("Fixed the nlink of inode %llu\n", rec->ino);
3096 * Clear the flag anyway, or we will loop forever for the same inode
3097 * as it will not be removed from the bad inode list and the dead loop
3100 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3101 btrfs_release_path(path);
3106 * Check if there is any normal(reg or prealloc) file extent for given
3108 * This is used to determine the file type when neither its dir_index/item or
3109 * inode_item exists.
3111 * This will *NOT* report error, if any error happens, just consider it does
3112 * not have any normal file extent.
3114 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3116 struct btrfs_path path;
3117 struct btrfs_key key;
3118 struct btrfs_key found_key;
3119 struct btrfs_file_extent_item *fi;
3123 btrfs_init_path(&path);
3125 key.type = BTRFS_EXTENT_DATA_KEY;
3128 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3133 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3134 ret = btrfs_next_leaf(root, &path);
3141 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3143 if (found_key.objectid != ino ||
3144 found_key.type != BTRFS_EXTENT_DATA_KEY)
3146 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3147 struct btrfs_file_extent_item);
3148 type = btrfs_file_extent_type(path.nodes[0], fi);
3149 if (type != BTRFS_FILE_EXTENT_INLINE) {
3155 btrfs_release_path(&path);
3159 static u32 btrfs_type_to_imode(u8 type)
3161 static u32 imode_by_btrfs_type[] = {
3162 [BTRFS_FT_REG_FILE] = S_IFREG,
3163 [BTRFS_FT_DIR] = S_IFDIR,
3164 [BTRFS_FT_CHRDEV] = S_IFCHR,
3165 [BTRFS_FT_BLKDEV] = S_IFBLK,
3166 [BTRFS_FT_FIFO] = S_IFIFO,
3167 [BTRFS_FT_SOCK] = S_IFSOCK,
3168 [BTRFS_FT_SYMLINK] = S_IFLNK,
3171 return imode_by_btrfs_type[(type)];
3174 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3175 struct btrfs_root *root,
3176 struct btrfs_path *path,
3177 struct inode_record *rec)
3181 int type_recovered = 0;
3184 printf("Trying to rebuild inode:%llu\n", rec->ino);
3186 type_recovered = !find_file_type(rec, &filetype);
3189 * Try to determine inode type if type not found.
3191 * For found regular file extent, it must be FILE.
3192 * For found dir_item/index, it must be DIR.
3194 * For undetermined one, use FILE as fallback.
3197 * 1. If found backref(inode_index/item is already handled) to it,
3199 * Need new inode-inode ref structure to allow search for that.
3201 if (!type_recovered) {
3202 if (rec->found_file_extent &&
3203 find_normal_file_extent(root, rec->ino)) {
3205 filetype = BTRFS_FT_REG_FILE;
3206 } else if (rec->found_dir_item) {
3208 filetype = BTRFS_FT_DIR;
3209 } else if (!list_empty(&rec->orphan_extents)) {
3211 filetype = BTRFS_FT_REG_FILE;
3213 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3216 filetype = BTRFS_FT_REG_FILE;
3220 ret = btrfs_new_inode(trans, root, rec->ino,
3221 mode | btrfs_type_to_imode(filetype));
3226 * Here inode rebuild is done, we only rebuild the inode item,
3227 * don't repair the nlink(like move to lost+found).
3228 * That is the job of nlink repair.
3230 * We just fill the record and return
3232 rec->found_dir_item = 1;
3233 rec->imode = mode | btrfs_type_to_imode(filetype);
3235 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3236 /* Ensure the inode_nlinks repair function will be called */
3237 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3242 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root,
3244 struct btrfs_path *path,
3245 struct inode_record *rec)
3247 struct orphan_data_extent *orphan;
3248 struct orphan_data_extent *tmp;
3251 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3253 * Check for conflicting file extents
3255 * Here we don't know whether the extents is compressed or not,
3256 * so we can only assume it not compressed nor data offset,
3257 * and use its disk_len as extent length.
3259 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3260 orphan->offset, orphan->disk_len, 0);
3261 btrfs_release_path(path);
3266 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3267 orphan->disk_bytenr, orphan->disk_len);
3268 ret = btrfs_free_extent(trans,
3269 root->fs_info->extent_root,
3270 orphan->disk_bytenr, orphan->disk_len,
3271 0, root->objectid, orphan->objectid,
3276 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3277 orphan->offset, orphan->disk_bytenr,
3278 orphan->disk_len, orphan->disk_len);
3282 /* Update file size info */
3283 rec->found_size += orphan->disk_len;
3284 if (rec->found_size == rec->nbytes)
3285 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3287 /* Update the file extent hole info too */
3288 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3292 if (RB_EMPTY_ROOT(&rec->holes))
3293 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3295 list_del(&orphan->list);
3298 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3303 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3304 struct btrfs_root *root,
3305 struct btrfs_path *path,
3306 struct inode_record *rec)
3308 struct rb_node *node;
3309 struct file_extent_hole *hole;
3313 node = rb_first(&rec->holes);
3317 hole = rb_entry(node, struct file_extent_hole, node);
3318 ret = btrfs_punch_hole(trans, root, rec->ino,
3319 hole->start, hole->len);
3322 ret = del_file_extent_hole(&rec->holes, hole->start,
3326 if (RB_EMPTY_ROOT(&rec->holes))
3327 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3328 node = rb_first(&rec->holes);
3330 /* special case for a file losing all its file extent */
3332 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3333 round_up(rec->isize,
3334 root->fs_info->sectorsize));
3338 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3339 rec->ino, root->objectid);
3344 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3346 struct btrfs_trans_handle *trans;
3347 struct btrfs_path path;
3350 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3351 I_ERR_NO_ORPHAN_ITEM |
3352 I_ERR_LINK_COUNT_WRONG |
3353 I_ERR_NO_INODE_ITEM |
3354 I_ERR_FILE_EXTENT_ORPHAN |
3355 I_ERR_FILE_EXTENT_DISCOUNT|
3356 I_ERR_FILE_NBYTES_WRONG)))
3360 * For nlink repair, it may create a dir and add link, so
3361 * 2 for parent(256)'s dir_index and dir_item
3362 * 2 for lost+found dir's inode_item and inode_ref
3363 * 1 for the new inode_ref of the file
3364 * 2 for lost+found dir's dir_index and dir_item for the file
3366 trans = btrfs_start_transaction(root, 7);
3368 return PTR_ERR(trans);
3370 btrfs_init_path(&path);
3371 if (rec->errors & I_ERR_NO_INODE_ITEM)
3372 ret = repair_inode_no_item(trans, root, &path, rec);
3373 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3374 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3375 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3376 ret = repair_inode_discount_extent(trans, root, &path, rec);
3377 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3378 ret = repair_inode_isize(trans, root, &path, rec);
3379 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3380 ret = repair_inode_orphan_item(trans, root, &path, rec);
3381 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3382 ret = repair_inode_nlinks(trans, root, &path, rec);
3383 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3384 ret = repair_inode_nbytes(trans, root, &path, rec);
3385 btrfs_commit_transaction(trans, root);
3386 btrfs_release_path(&path);
3390 static int check_inode_recs(struct btrfs_root *root,
3391 struct cache_tree *inode_cache)
3393 struct cache_extent *cache;
3394 struct ptr_node *node;
3395 struct inode_record *rec;
3396 struct inode_backref *backref;
3401 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3403 if (btrfs_root_refs(&root->root_item) == 0) {
3404 if (!cache_tree_empty(inode_cache))
3405 fprintf(stderr, "warning line %d\n", __LINE__);
3410 * We need to repair backrefs first because we could change some of the
3411 * errors in the inode recs.
3413 * We also need to go through and delete invalid backrefs first and then
3414 * add the correct ones second. We do this because we may get EEXIST
3415 * when adding back the correct index because we hadn't yet deleted the
3418 * For example, if we were missing a dir index then the directories
3419 * isize would be wrong, so if we fixed the isize to what we thought it
3420 * would be and then fixed the backref we'd still have a invalid fs, so
3421 * we need to add back the dir index and then check to see if the isize
3426 if (stage == 3 && !err)
3429 cache = search_cache_extent(inode_cache, 0);
3430 while (repair && cache) {
3431 node = container_of(cache, struct ptr_node, cache);
3433 cache = next_cache_extent(cache);
3435 /* Need to free everything up and rescan */
3437 remove_cache_extent(inode_cache, &node->cache);
3439 free_inode_rec(rec);
3443 if (list_empty(&rec->backrefs))
3446 ret = repair_inode_backrefs(root, rec, inode_cache,
3460 rec = get_inode_rec(inode_cache, root_dirid, 0);
3461 BUG_ON(IS_ERR(rec));
3463 ret = check_root_dir(rec);
3465 fprintf(stderr, "root %llu root dir %llu error\n",
3466 (unsigned long long)root->root_key.objectid,
3467 (unsigned long long)root_dirid);
3468 print_inode_error(root, rec);
3473 struct btrfs_trans_handle *trans;
3475 trans = btrfs_start_transaction(root, 1);
3476 if (IS_ERR(trans)) {
3477 err = PTR_ERR(trans);
3482 "root %llu missing its root dir, recreating\n",
3483 (unsigned long long)root->objectid);
3485 ret = btrfs_make_root_dir(trans, root, root_dirid);
3488 btrfs_commit_transaction(trans, root);
3492 fprintf(stderr, "root %llu root dir %llu not found\n",
3493 (unsigned long long)root->root_key.objectid,
3494 (unsigned long long)root_dirid);
3498 cache = search_cache_extent(inode_cache, 0);
3501 node = container_of(cache, struct ptr_node, cache);
3503 remove_cache_extent(inode_cache, &node->cache);
3505 if (rec->ino == root_dirid ||
3506 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3507 free_inode_rec(rec);
3511 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3512 ret = check_orphan_item(root, rec->ino);
3514 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3515 if (can_free_inode_rec(rec)) {
3516 free_inode_rec(rec);
3521 if (!rec->found_inode_item)
3522 rec->errors |= I_ERR_NO_INODE_ITEM;
3523 if (rec->found_link != rec->nlink)
3524 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3526 ret = try_repair_inode(root, rec);
3527 if (ret == 0 && can_free_inode_rec(rec)) {
3528 free_inode_rec(rec);
3534 if (!(repair && ret == 0))
3536 print_inode_error(root, rec);
3537 list_for_each_entry(backref, &rec->backrefs, list) {
3538 if (!backref->found_dir_item)
3539 backref->errors |= REF_ERR_NO_DIR_ITEM;
3540 if (!backref->found_dir_index)
3541 backref->errors |= REF_ERR_NO_DIR_INDEX;
3542 if (!backref->found_inode_ref)
3543 backref->errors |= REF_ERR_NO_INODE_REF;
3544 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3545 " namelen %u name %s filetype %d errors %x",
3546 (unsigned long long)backref->dir,
3547 (unsigned long long)backref->index,
3548 backref->namelen, backref->name,
3549 backref->filetype, backref->errors);
3550 print_ref_error(backref->errors);
3552 free_inode_rec(rec);
3554 return (error > 0) ? -1 : 0;
3557 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3560 struct cache_extent *cache;
3561 struct root_record *rec = NULL;
3564 cache = lookup_cache_extent(root_cache, objectid, 1);
3566 rec = container_of(cache, struct root_record, cache);
3568 rec = calloc(1, sizeof(*rec));
3570 return ERR_PTR(-ENOMEM);
3571 rec->objectid = objectid;
3572 INIT_LIST_HEAD(&rec->backrefs);
3573 rec->cache.start = objectid;
3574 rec->cache.size = 1;
3576 ret = insert_cache_extent(root_cache, &rec->cache);
3578 return ERR_PTR(-EEXIST);
3583 static struct root_backref *get_root_backref(struct root_record *rec,
3584 u64 ref_root, u64 dir, u64 index,
3585 const char *name, int namelen)
3587 struct root_backref *backref;
3589 list_for_each_entry(backref, &rec->backrefs, list) {
3590 if (backref->ref_root != ref_root || backref->dir != dir ||
3591 backref->namelen != namelen)
3593 if (memcmp(name, backref->name, namelen))
3598 backref = calloc(1, sizeof(*backref) + namelen + 1);
3601 backref->ref_root = ref_root;
3603 backref->index = index;
3604 backref->namelen = namelen;
3605 memcpy(backref->name, name, namelen);
3606 backref->name[namelen] = '\0';
3607 list_add_tail(&backref->list, &rec->backrefs);
3611 static void free_root_record(struct cache_extent *cache)
3613 struct root_record *rec;
3614 struct root_backref *backref;
3616 rec = container_of(cache, struct root_record, cache);
3617 while (!list_empty(&rec->backrefs)) {
3618 backref = to_root_backref(rec->backrefs.next);
3619 list_del(&backref->list);
3626 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3628 static int add_root_backref(struct cache_tree *root_cache,
3629 u64 root_id, u64 ref_root, u64 dir, u64 index,
3630 const char *name, int namelen,
3631 int item_type, int errors)
3633 struct root_record *rec;
3634 struct root_backref *backref;
3636 rec = get_root_rec(root_cache, root_id);
3637 BUG_ON(IS_ERR(rec));
3638 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3641 backref->errors |= errors;
3643 if (item_type != BTRFS_DIR_ITEM_KEY) {
3644 if (backref->found_dir_index || backref->found_back_ref ||
3645 backref->found_forward_ref) {
3646 if (backref->index != index)
3647 backref->errors |= REF_ERR_INDEX_UNMATCH;
3649 backref->index = index;
3653 if (item_type == BTRFS_DIR_ITEM_KEY) {
3654 if (backref->found_forward_ref)
3656 backref->found_dir_item = 1;
3657 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3658 backref->found_dir_index = 1;
3659 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3660 if (backref->found_forward_ref)
3661 backref->errors |= REF_ERR_DUP_ROOT_REF;
3662 else if (backref->found_dir_item)
3664 backref->found_forward_ref = 1;
3665 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3666 if (backref->found_back_ref)
3667 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3668 backref->found_back_ref = 1;
3673 if (backref->found_forward_ref && backref->found_dir_item)
3674 backref->reachable = 1;
3678 static int merge_root_recs(struct btrfs_root *root,
3679 struct cache_tree *src_cache,
3680 struct cache_tree *dst_cache)
3682 struct cache_extent *cache;
3683 struct ptr_node *node;
3684 struct inode_record *rec;
3685 struct inode_backref *backref;
3688 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3689 free_inode_recs_tree(src_cache);
3694 cache = search_cache_extent(src_cache, 0);
3697 node = container_of(cache, struct ptr_node, cache);
3699 remove_cache_extent(src_cache, &node->cache);
3702 ret = is_child_root(root, root->objectid, rec->ino);
3708 list_for_each_entry(backref, &rec->backrefs, list) {
3709 BUG_ON(backref->found_inode_ref);
3710 if (backref->found_dir_item)
3711 add_root_backref(dst_cache, rec->ino,
3712 root->root_key.objectid, backref->dir,
3713 backref->index, backref->name,
3714 backref->namelen, BTRFS_DIR_ITEM_KEY,
3716 if (backref->found_dir_index)
3717 add_root_backref(dst_cache, rec->ino,
3718 root->root_key.objectid, backref->dir,
3719 backref->index, backref->name,
3720 backref->namelen, BTRFS_DIR_INDEX_KEY,
3724 free_inode_rec(rec);
3731 static int check_root_refs(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct root_record *rec;
3735 struct root_record *ref_root;
3736 struct root_backref *backref;
3737 struct cache_extent *cache;
3743 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3744 BUG_ON(IS_ERR(rec));
3747 /* fixme: this can not detect circular references */
3750 cache = search_cache_extent(root_cache, 0);
3754 rec = container_of(cache, struct root_record, cache);
3755 cache = next_cache_extent(cache);
3757 if (rec->found_ref == 0)
3760 list_for_each_entry(backref, &rec->backrefs, list) {
3761 if (!backref->reachable)
3764 ref_root = get_root_rec(root_cache,
3766 BUG_ON(IS_ERR(ref_root));
3767 if (ref_root->found_ref > 0)
3770 backref->reachable = 0;
3772 if (rec->found_ref == 0)
3778 cache = search_cache_extent(root_cache, 0);
3782 rec = container_of(cache, struct root_record, cache);
3783 cache = next_cache_extent(cache);
3785 if (rec->found_ref == 0 &&
3786 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3787 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3788 ret = check_orphan_item(root->fs_info->tree_root,
3794 * If we don't have a root item then we likely just have
3795 * a dir item in a snapshot for this root but no actual
3796 * ref key or anything so it's meaningless.
3798 if (!rec->found_root_item)
3801 fprintf(stderr, "fs tree %llu not referenced\n",
3802 (unsigned long long)rec->objectid);
3806 if (rec->found_ref > 0 && !rec->found_root_item)
3808 list_for_each_entry(backref, &rec->backrefs, list) {
3809 if (!backref->found_dir_item)
3810 backref->errors |= REF_ERR_NO_DIR_ITEM;
3811 if (!backref->found_dir_index)
3812 backref->errors |= REF_ERR_NO_DIR_INDEX;
3813 if (!backref->found_back_ref)
3814 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3815 if (!backref->found_forward_ref)
3816 backref->errors |= REF_ERR_NO_ROOT_REF;
3817 if (backref->reachable && backref->errors)
3824 fprintf(stderr, "fs tree %llu refs %u %s\n",
3825 (unsigned long long)rec->objectid, rec->found_ref,
3826 rec->found_root_item ? "" : "not found");
3828 list_for_each_entry(backref, &rec->backrefs, list) {
3829 if (!backref->reachable)
3831 if (!backref->errors && rec->found_root_item)
3833 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3834 " index %llu namelen %u name %s errors %x\n",
3835 (unsigned long long)backref->ref_root,
3836 (unsigned long long)backref->dir,
3837 (unsigned long long)backref->index,
3838 backref->namelen, backref->name,
3840 print_ref_error(backref->errors);
3843 return errors > 0 ? 1 : 0;
3846 static int process_root_ref(struct extent_buffer *eb, int slot,
3847 struct btrfs_key *key,
3848 struct cache_tree *root_cache)
3854 struct btrfs_root_ref *ref;
3855 char namebuf[BTRFS_NAME_LEN];
3858 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3860 dirid = btrfs_root_ref_dirid(eb, ref);
3861 index = btrfs_root_ref_sequence(eb, ref);
3862 name_len = btrfs_root_ref_name_len(eb, ref);
3864 if (name_len <= BTRFS_NAME_LEN) {
3868 len = BTRFS_NAME_LEN;
3869 error = REF_ERR_NAME_TOO_LONG;
3871 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3873 if (key->type == BTRFS_ROOT_REF_KEY) {
3874 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3875 index, namebuf, len, key->type, error);
3877 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3878 index, namebuf, len, key->type, error);
3883 static void free_corrupt_block(struct cache_extent *cache)
3885 struct btrfs_corrupt_block *corrupt;
3887 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3891 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3894 * Repair the btree of the given root.
3896 * The fix is to remove the node key in corrupt_blocks cache_tree.
3897 * and rebalance the tree.
3898 * After the fix, the btree should be writeable.
3900 static int repair_btree(struct btrfs_root *root,
3901 struct cache_tree *corrupt_blocks)
3903 struct btrfs_trans_handle *trans;
3904 struct btrfs_path path;
3905 struct btrfs_corrupt_block *corrupt;
3906 struct cache_extent *cache;
3907 struct btrfs_key key;
3912 if (cache_tree_empty(corrupt_blocks))
3915 trans = btrfs_start_transaction(root, 1);
3916 if (IS_ERR(trans)) {
3917 ret = PTR_ERR(trans);
3918 fprintf(stderr, "Error starting transaction: %s\n",
3922 btrfs_init_path(&path);
3923 cache = first_cache_extent(corrupt_blocks);
3925 corrupt = container_of(cache, struct btrfs_corrupt_block,
3927 level = corrupt->level;
3928 path.lowest_level = level;
3929 key.objectid = corrupt->key.objectid;
3930 key.type = corrupt->key.type;
3931 key.offset = corrupt->key.offset;
3934 * Here we don't want to do any tree balance, since it may
3935 * cause a balance with corrupted brother leaf/node,
3936 * so ins_len set to 0 here.
3937 * Balance will be done after all corrupt node/leaf is deleted.
3939 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3942 offset = btrfs_node_blockptr(path.nodes[level],
3945 /* Remove the ptr */
3946 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3950 * Remove the corresponding extent
3951 * return value is not concerned.
3953 btrfs_release_path(&path);
3954 ret = btrfs_free_extent(trans, root, offset,
3955 root->fs_info->nodesize, 0,
3956 root->root_key.objectid, level - 1, 0);
3957 cache = next_cache_extent(cache);
3960 /* Balance the btree using btrfs_search_slot() */
3961 cache = first_cache_extent(corrupt_blocks);
3963 corrupt = container_of(cache, struct btrfs_corrupt_block,
3965 memcpy(&key, &corrupt->key, sizeof(key));
3966 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3969 /* return will always >0 since it won't find the item */
3971 btrfs_release_path(&path);
3972 cache = next_cache_extent(cache);
3975 btrfs_commit_transaction(trans, root);
3976 btrfs_release_path(&path);
3980 static int check_fs_root(struct btrfs_root *root,
3981 struct cache_tree *root_cache,
3982 struct walk_control *wc)
3988 struct btrfs_path path;
3989 struct shared_node root_node;
3990 struct root_record *rec;
3991 struct btrfs_root_item *root_item = &root->root_item;
3992 struct cache_tree corrupt_blocks;
3993 struct orphan_data_extent *orphan;
3994 struct orphan_data_extent *tmp;
3995 enum btrfs_tree_block_status status;
3996 struct node_refs nrefs;
3999 * Reuse the corrupt_block cache tree to record corrupted tree block
4001 * Unlike the usage in extent tree check, here we do it in a per
4002 * fs/subvol tree base.
4004 cache_tree_init(&corrupt_blocks);
4005 root->fs_info->corrupt_blocks = &corrupt_blocks;
4007 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4008 rec = get_root_rec(root_cache, root->root_key.objectid);
4009 BUG_ON(IS_ERR(rec));
4010 if (btrfs_root_refs(root_item) > 0)
4011 rec->found_root_item = 1;
4014 btrfs_init_path(&path);
4015 memset(&root_node, 0, sizeof(root_node));
4016 cache_tree_init(&root_node.root_cache);
4017 cache_tree_init(&root_node.inode_cache);
4018 memset(&nrefs, 0, sizeof(nrefs));
4020 /* Move the orphan extent record to corresponding inode_record */
4021 list_for_each_entry_safe(orphan, tmp,
4022 &root->orphan_data_extents, list) {
4023 struct inode_record *inode;
4025 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4027 BUG_ON(IS_ERR(inode));
4028 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4029 list_move(&orphan->list, &inode->orphan_extents);
4032 level = btrfs_header_level(root->node);
4033 memset(wc->nodes, 0, sizeof(wc->nodes));
4034 wc->nodes[level] = &root_node;
4035 wc->active_node = level;
4036 wc->root_level = level;
4038 /* We may not have checked the root block, lets do that now */
4039 if (btrfs_is_leaf(root->node))
4040 status = btrfs_check_leaf(root, NULL, root->node);
4042 status = btrfs_check_node(root, NULL, root->node);
4043 if (status != BTRFS_TREE_BLOCK_CLEAN)
4046 if (btrfs_root_refs(root_item) > 0 ||
4047 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4048 path.nodes[level] = root->node;
4049 extent_buffer_get(root->node);
4050 path.slots[level] = 0;
4052 struct btrfs_key key;
4053 struct btrfs_disk_key found_key;
4055 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4056 level = root_item->drop_level;
4057 path.lowest_level = level;
4058 if (level > btrfs_header_level(root->node) ||
4059 level >= BTRFS_MAX_LEVEL) {
4060 error("ignoring invalid drop level: %u", level);
4063 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4066 btrfs_node_key(path.nodes[level], &found_key,
4068 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4069 sizeof(found_key)));
4073 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4079 wret = walk_up_tree(root, &path, wc, &level);
4086 btrfs_release_path(&path);
4088 if (!cache_tree_empty(&corrupt_blocks)) {
4089 struct cache_extent *cache;
4090 struct btrfs_corrupt_block *corrupt;
4092 printf("The following tree block(s) is corrupted in tree %llu:\n",
4093 root->root_key.objectid);
4094 cache = first_cache_extent(&corrupt_blocks);
4096 corrupt = container_of(cache,
4097 struct btrfs_corrupt_block,
4099 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4100 cache->start, corrupt->level,
4101 corrupt->key.objectid, corrupt->key.type,
4102 corrupt->key.offset);
4103 cache = next_cache_extent(cache);
4106 printf("Try to repair the btree for root %llu\n",
4107 root->root_key.objectid);
4108 ret = repair_btree(root, &corrupt_blocks);
4110 fprintf(stderr, "Failed to repair btree: %s\n",
4113 printf("Btree for root %llu is fixed\n",
4114 root->root_key.objectid);
4118 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4122 if (root_node.current) {
4123 root_node.current->checked = 1;
4124 maybe_free_inode_rec(&root_node.inode_cache,
4128 err = check_inode_recs(root, &root_node.inode_cache);
4132 free_corrupt_blocks_tree(&corrupt_blocks);
4133 root->fs_info->corrupt_blocks = NULL;
4134 free_orphan_data_extents(&root->orphan_data_extents);
4138 static int fs_root_objectid(u64 objectid)
4140 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4141 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4143 return is_fstree(objectid);
4146 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4147 struct cache_tree *root_cache)
4149 struct btrfs_path path;
4150 struct btrfs_key key;
4151 struct walk_control wc;
4152 struct extent_buffer *leaf, *tree_node;
4153 struct btrfs_root *tmp_root;
4154 struct btrfs_root *tree_root = fs_info->tree_root;
4158 if (ctx.progress_enabled) {
4159 ctx.tp = TASK_FS_ROOTS;
4160 task_start(ctx.info);
4164 * Just in case we made any changes to the extent tree that weren't
4165 * reflected into the free space cache yet.
4168 reset_cached_block_groups(fs_info);
4169 memset(&wc, 0, sizeof(wc));
4170 cache_tree_init(&wc.shared);
4171 btrfs_init_path(&path);
4176 key.type = BTRFS_ROOT_ITEM_KEY;
4177 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4182 tree_node = tree_root->node;
4184 if (tree_node != tree_root->node) {
4185 free_root_recs_tree(root_cache);
4186 btrfs_release_path(&path);
4189 leaf = path.nodes[0];
4190 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4191 ret = btrfs_next_leaf(tree_root, &path);
4197 leaf = path.nodes[0];
4199 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4200 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4201 fs_root_objectid(key.objectid)) {
4202 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4203 tmp_root = btrfs_read_fs_root_no_cache(
4206 key.offset = (u64)-1;
4207 tmp_root = btrfs_read_fs_root(
4210 if (IS_ERR(tmp_root)) {
4214 ret = check_fs_root(tmp_root, root_cache, &wc);
4215 if (ret == -EAGAIN) {
4216 free_root_recs_tree(root_cache);
4217 btrfs_release_path(&path);
4222 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4223 btrfs_free_fs_root(tmp_root);
4224 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4225 key.type == BTRFS_ROOT_BACKREF_KEY) {
4226 process_root_ref(leaf, path.slots[0], &key,
4233 btrfs_release_path(&path);
4235 free_extent_cache_tree(&wc.shared);
4236 if (!cache_tree_empty(&wc.shared))
4237 fprintf(stderr, "warning line %d\n", __LINE__);
4239 task_stop(ctx.info);
4245 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4246 * INODE_REF/INODE_EXTREF match.
4248 * @root: the root of the fs/file tree
4249 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4250 * @key: the key of the DIR_ITEM/DIR_INDEX
4251 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4252 * distinguish root_dir between normal dir/file
4253 * @name: the name in the INODE_REF/INODE_EXTREF
4254 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4255 * @mode: the st_mode of INODE_ITEM
4257 * Return 0 if no error occurred.
4258 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4259 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4261 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4262 * not match for normal dir/file.
4264 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4265 struct btrfs_key *key, u64 index, char *name,
4266 u32 namelen, u32 mode)
4268 struct btrfs_path path;
4269 struct extent_buffer *node;
4270 struct btrfs_dir_item *di;
4271 struct btrfs_key location;
4272 char namebuf[BTRFS_NAME_LEN] = {0};
4282 btrfs_init_path(&path);
4283 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4285 ret = DIR_ITEM_MISSING;
4289 /* Process root dir and goto out*/
4292 ret = ROOT_DIR_ERROR;
4294 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4296 ref_key->type == BTRFS_INODE_REF_KEY ?
4298 ref_key->objectid, ref_key->offset,
4299 key->type == BTRFS_DIR_ITEM_KEY ?
4300 "DIR_ITEM" : "DIR_INDEX");
4308 /* Process normal file/dir */
4310 ret = DIR_ITEM_MISSING;
4312 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4314 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4315 ref_key->objectid, ref_key->offset,
4316 key->type == BTRFS_DIR_ITEM_KEY ?
4317 "DIR_ITEM" : "DIR_INDEX",
4318 key->objectid, key->offset, namelen, name,
4319 imode_to_type(mode));
4323 /* Check whether inode_id/filetype/name match */
4324 node = path.nodes[0];
4325 slot = path.slots[0];
4326 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4327 total = btrfs_item_size_nr(node, slot);
4328 while (cur < total) {
4329 ret = DIR_ITEM_MISMATCH;
4330 name_len = btrfs_dir_name_len(node, di);
4331 data_len = btrfs_dir_data_len(node, di);
4333 btrfs_dir_item_key_to_cpu(node, di, &location);
4334 if (location.objectid != ref_key->objectid ||
4335 location.type != BTRFS_INODE_ITEM_KEY ||
4336 location.offset != 0)
4339 filetype = btrfs_dir_type(node, di);
4340 if (imode_to_type(mode) != filetype)
4343 if (cur + sizeof(*di) + name_len > total ||
4344 name_len > BTRFS_NAME_LEN) {
4345 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4347 key->type == BTRFS_DIR_ITEM_KEY ?
4348 "DIR_ITEM" : "DIR_INDEX",
4349 key->objectid, key->offset, name_len);
4351 if (cur + sizeof(*di) > total)
4353 len = min_t(u32, total - cur - sizeof(*di),
4359 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4360 if (len != namelen || strncmp(namebuf, name, len))
4366 len = sizeof(*di) + name_len + data_len;
4367 di = (struct btrfs_dir_item *)((char *)di + len);
4370 if (ret == DIR_ITEM_MISMATCH)
4372 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4374 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4375 ref_key->objectid, ref_key->offset,
4376 key->type == BTRFS_DIR_ITEM_KEY ?
4377 "DIR_ITEM" : "DIR_INDEX",
4378 key->objectid, key->offset, namelen, name,
4379 imode_to_type(mode));
4381 btrfs_release_path(&path);
4386 * Traverse the given INODE_REF and call find_dir_item() to find related
4387 * DIR_ITEM/DIR_INDEX.
4389 * @root: the root of the fs/file tree
4390 * @ref_key: the key of the INODE_REF
4391 * @refs: the count of INODE_REF
4392 * @mode: the st_mode of INODE_ITEM
4394 * Return 0 if no error occurred.
4396 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4397 struct extent_buffer *node, int slot, u64 *refs,
4400 struct btrfs_key key;
4401 struct btrfs_inode_ref *ref;
4402 char namebuf[BTRFS_NAME_LEN] = {0};
4410 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4411 total = btrfs_item_size_nr(node, slot);
4414 /* Update inode ref count */
4417 index = btrfs_inode_ref_index(node, ref);
4418 name_len = btrfs_inode_ref_name_len(node, ref);
4419 if (cur + sizeof(*ref) + name_len > total ||
4420 name_len > BTRFS_NAME_LEN) {
4421 warning("root %llu INODE_REF[%llu %llu] name too long",
4422 root->objectid, ref_key->objectid, ref_key->offset);
4424 if (total < cur + sizeof(*ref))
4426 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4431 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4433 /* Check root dir ref name */
4434 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4435 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4436 root->objectid, ref_key->objectid, ref_key->offset,
4438 err |= ROOT_DIR_ERROR;
4441 /* Find related DIR_INDEX */
4442 key.objectid = ref_key->offset;
4443 key.type = BTRFS_DIR_INDEX_KEY;
4445 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4448 /* Find related dir_item */
4449 key.objectid = ref_key->offset;
4450 key.type = BTRFS_DIR_ITEM_KEY;
4451 key.offset = btrfs_name_hash(namebuf, len);
4452 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4455 len = sizeof(*ref) + name_len;
4456 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4466 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4467 * DIR_ITEM/DIR_INDEX.
4469 * @root: the root of the fs/file tree
4470 * @ref_key: the key of the INODE_EXTREF
4471 * @refs: the count of INODE_EXTREF
4472 * @mode: the st_mode of INODE_ITEM
4474 * Return 0 if no error occurred.
4476 static int check_inode_extref(struct btrfs_root *root,
4477 struct btrfs_key *ref_key,
4478 struct extent_buffer *node, int slot, u64 *refs,
4481 struct btrfs_key key;
4482 struct btrfs_inode_extref *extref;
4483 char namebuf[BTRFS_NAME_LEN] = {0};
4493 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4494 total = btrfs_item_size_nr(node, slot);
4497 /* update inode ref count */
4499 name_len = btrfs_inode_extref_name_len(node, extref);
4500 index = btrfs_inode_extref_index(node, extref);
4501 parent = btrfs_inode_extref_parent(node, extref);
4502 if (name_len <= BTRFS_NAME_LEN) {
4505 len = BTRFS_NAME_LEN;
4506 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4507 root->objectid, ref_key->objectid, ref_key->offset);
4509 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4511 /* Check root dir ref name */
4512 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4513 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4514 root->objectid, ref_key->objectid, ref_key->offset,
4516 err |= ROOT_DIR_ERROR;
4519 /* find related dir_index */
4520 key.objectid = parent;
4521 key.type = BTRFS_DIR_INDEX_KEY;
4523 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4526 /* find related dir_item */
4527 key.objectid = parent;
4528 key.type = BTRFS_DIR_ITEM_KEY;
4529 key.offset = btrfs_name_hash(namebuf, len);
4530 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4533 len = sizeof(*extref) + name_len;
4534 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4544 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4545 * DIR_ITEM/DIR_INDEX match.
4547 * @root: the root of the fs/file tree
4548 * @key: the key of the INODE_REF/INODE_EXTREF
4549 * @name: the name in the INODE_REF/INODE_EXTREF
4550 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4551 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4553 * @ext_ref: the EXTENDED_IREF feature
4555 * Return 0 if no error occurred.
4556 * Return >0 for error bitmap
4558 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4559 char *name, int namelen, u64 index,
4560 unsigned int ext_ref)
4562 struct btrfs_path path;
4563 struct btrfs_inode_ref *ref;
4564 struct btrfs_inode_extref *extref;
4565 struct extent_buffer *node;
4566 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4577 btrfs_init_path(&path);
4578 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4580 ret = INODE_REF_MISSING;
4584 node = path.nodes[0];
4585 slot = path.slots[0];
4587 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4588 total = btrfs_item_size_nr(node, slot);
4590 /* Iterate all entry of INODE_REF */
4591 while (cur < total) {
4592 ret = INODE_REF_MISSING;
4594 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4595 ref_index = btrfs_inode_ref_index(node, ref);
4596 if (index != (u64)-1 && index != ref_index)
4599 if (cur + sizeof(*ref) + ref_namelen > total ||
4600 ref_namelen > BTRFS_NAME_LEN) {
4601 warning("root %llu INODE %s[%llu %llu] name too long",
4603 key->type == BTRFS_INODE_REF_KEY ?
4605 key->objectid, key->offset);
4607 if (cur + sizeof(*ref) > total)
4609 len = min_t(u32, total - cur - sizeof(*ref),
4615 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4618 if (len != namelen || strncmp(ref_namebuf, name, len))
4624 len = sizeof(*ref) + ref_namelen;
4625 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4630 /* Skip if not support EXTENDED_IREF feature */
4634 btrfs_release_path(&path);
4635 btrfs_init_path(&path);
4637 dir_id = key->offset;
4638 key->type = BTRFS_INODE_EXTREF_KEY;
4639 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4641 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4643 ret = INODE_REF_MISSING;
4647 node = path.nodes[0];
4648 slot = path.slots[0];
4650 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4652 total = btrfs_item_size_nr(node, slot);
4654 /* Iterate all entry of INODE_EXTREF */
4655 while (cur < total) {
4656 ret = INODE_REF_MISSING;
4658 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4659 ref_index = btrfs_inode_extref_index(node, extref);
4660 parent = btrfs_inode_extref_parent(node, extref);
4661 if (index != (u64)-1 && index != ref_index)
4664 if (parent != dir_id)
4667 if (ref_namelen <= BTRFS_NAME_LEN) {
4670 len = BTRFS_NAME_LEN;
4671 warning("root %llu INODE %s[%llu %llu] name too long",
4673 key->type == BTRFS_INODE_REF_KEY ?
4675 key->objectid, key->offset);
4677 read_extent_buffer(node, ref_namebuf,
4678 (unsigned long)(extref + 1), len);
4680 if (len != namelen || strncmp(ref_namebuf, name, len))
4687 len = sizeof(*extref) + ref_namelen;
4688 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4693 btrfs_release_path(&path);
4698 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4699 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4701 * @root: the root of the fs/file tree
4702 * @key: the key of the INODE_REF/INODE_EXTREF
4703 * @size: the st_size of the INODE_ITEM
4704 * @ext_ref: the EXTENDED_IREF feature
4706 * Return 0 if no error occurred.
4708 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4709 struct extent_buffer *node, int slot, u64 *size,
4710 unsigned int ext_ref)
4712 struct btrfs_dir_item *di;
4713 struct btrfs_inode_item *ii;
4714 struct btrfs_path path;
4715 struct btrfs_key location;
4716 char namebuf[BTRFS_NAME_LEN] = {0};
4729 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4730 * ignore index check.
4732 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4734 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4735 total = btrfs_item_size_nr(node, slot);
4737 while (cur < total) {
4738 data_len = btrfs_dir_data_len(node, di);
4740 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4741 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4742 "DIR_ITEM" : "DIR_INDEX",
4743 key->objectid, key->offset, data_len);
4745 name_len = btrfs_dir_name_len(node, di);
4746 if (cur + sizeof(*di) + name_len > total ||
4747 name_len > BTRFS_NAME_LEN) {
4748 warning("root %llu %s[%llu %llu] name too long",
4750 key->type == BTRFS_DIR_ITEM_KEY ?
4751 "DIR_ITEM" : "DIR_INDEX",
4752 key->objectid, key->offset);
4754 if (cur + sizeof(*di) > total)
4756 len = min_t(u32, total - cur - sizeof(*di),
4761 (*size) += name_len;
4763 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4764 filetype = btrfs_dir_type(node, di);
4766 if (key->type == BTRFS_DIR_ITEM_KEY &&
4767 key->offset != btrfs_name_hash(namebuf, len)) {
4769 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4770 root->objectid, key->objectid, key->offset,
4771 namebuf, len, filetype, key->offset,
4772 btrfs_name_hash(namebuf, len));
4775 btrfs_init_path(&path);
4776 btrfs_dir_item_key_to_cpu(node, di, &location);
4778 /* Ignore related ROOT_ITEM check */
4779 if (location.type == BTRFS_ROOT_ITEM_KEY)
4782 /* Check relative INODE_ITEM(existence/filetype) */
4783 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4785 err |= INODE_ITEM_MISSING;
4786 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4787 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4788 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4789 key->offset, location.objectid, name_len,
4794 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4795 struct btrfs_inode_item);
4796 mode = btrfs_inode_mode(path.nodes[0], ii);
4798 if (imode_to_type(mode) != filetype) {
4799 err |= INODE_ITEM_MISMATCH;
4800 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4801 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4802 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4803 key->offset, name_len, namebuf, filetype);
4806 /* Check relative INODE_REF/INODE_EXTREF */
4807 location.type = BTRFS_INODE_REF_KEY;
4808 location.offset = key->objectid;
4809 ret = find_inode_ref(root, &location, namebuf, len,
4812 if (ret & INODE_REF_MISSING)
4813 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4814 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4815 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4816 key->offset, name_len, namebuf, filetype);
4819 btrfs_release_path(&path);
4820 len = sizeof(*di) + name_len + data_len;
4821 di = (struct btrfs_dir_item *)((char *)di + len);
4824 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4825 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4826 root->objectid, key->objectid, key->offset);
4835 * Check file extent datasum/hole, update the size of the file extents,
4836 * check and update the last offset of the file extent.
4838 * @root: the root of fs/file tree.
4839 * @fkey: the key of the file extent.
4840 * @nodatasum: INODE_NODATASUM feature.
4841 * @size: the sum of all EXTENT_DATA items size for this inode.
4842 * @end: the offset of the last extent.
4844 * Return 0 if no error occurred.
4846 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4847 struct extent_buffer *node, int slot,
4848 unsigned int nodatasum, u64 *size, u64 *end)
4850 struct btrfs_file_extent_item *fi;
4853 u64 extent_num_bytes;
4855 u64 csum_found; /* In byte size, sectorsize aligned */
4856 u64 search_start; /* Logical range start we search for csum */
4857 u64 search_len; /* Logical range len we search for csum */
4858 unsigned int extent_type;
4859 unsigned int is_hole;
4864 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4866 /* Check inline extent */
4867 extent_type = btrfs_file_extent_type(node, fi);
4868 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4869 struct btrfs_item *e = btrfs_item_nr(slot);
4870 u32 item_inline_len;
4872 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4873 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4874 compressed = btrfs_file_extent_compression(node, fi);
4875 if (extent_num_bytes == 0) {
4877 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4878 root->objectid, fkey->objectid, fkey->offset);
4879 err |= FILE_EXTENT_ERROR;
4881 if (!compressed && extent_num_bytes != item_inline_len) {
4883 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4884 root->objectid, fkey->objectid, fkey->offset,
4885 extent_num_bytes, item_inline_len);
4886 err |= FILE_EXTENT_ERROR;
4888 *end += extent_num_bytes;
4889 *size += extent_num_bytes;
4893 /* Check extent type */
4894 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4895 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4896 err |= FILE_EXTENT_ERROR;
4897 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4898 root->objectid, fkey->objectid, fkey->offset);
4902 /* Check REG_EXTENT/PREALLOC_EXTENT */
4903 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4904 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4905 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4906 extent_offset = btrfs_file_extent_offset(node, fi);
4907 compressed = btrfs_file_extent_compression(node, fi);
4908 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4911 * Check EXTENT_DATA csum
4913 * For plain (uncompressed) extent, we should only check the range
4914 * we're referring to, as it's possible that part of prealloc extent
4915 * has been written, and has csum:
4917 * |<--- Original large preallocated extent A ---->|
4918 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4921 * For compressed extent, we should check the whole range.
4924 search_start = disk_bytenr + extent_offset;
4925 search_len = extent_num_bytes;
4927 search_start = disk_bytenr;
4928 search_len = disk_num_bytes;
4930 ret = count_csum_range(root, search_start, search_len, &csum_found);
4931 if (csum_found > 0 && nodatasum) {
4932 err |= ODD_CSUM_ITEM;
4933 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4934 root->objectid, fkey->objectid, fkey->offset);
4935 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4936 !is_hole && (ret < 0 || csum_found < search_len)) {
4937 err |= CSUM_ITEM_MISSING;
4938 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4939 root->objectid, fkey->objectid, fkey->offset,
4940 csum_found, search_len);
4941 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4942 err |= ODD_CSUM_ITEM;
4943 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4944 root->objectid, fkey->objectid, fkey->offset, csum_found);
4947 /* Check EXTENT_DATA hole */
4948 if (!no_holes && *end != fkey->offset) {
4949 err |= FILE_EXTENT_ERROR;
4950 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4951 root->objectid, fkey->objectid, fkey->offset);
4954 *end += extent_num_bytes;
4956 *size += extent_num_bytes;
4962 * Check INODE_ITEM and related ITEMs (the same inode number)
4963 * 1. check link count
4964 * 2. check inode ref/extref
4965 * 3. check dir item/index
4967 * @ext_ref: the EXTENDED_IREF feature
4969 * Return 0 if no error occurred.
4970 * Return >0 for error or hit the traversal is done(by error bitmap)
4972 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4973 unsigned int ext_ref)
4975 struct extent_buffer *node;
4976 struct btrfs_inode_item *ii;
4977 struct btrfs_key key;
4986 u64 extent_size = 0;
4988 unsigned int nodatasum;
4993 node = path->nodes[0];
4994 slot = path->slots[0];
4996 btrfs_item_key_to_cpu(node, &key, slot);
4997 inode_id = key.objectid;
4999 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5000 ret = btrfs_next_item(root, path);
5006 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5007 isize = btrfs_inode_size(node, ii);
5008 nbytes = btrfs_inode_nbytes(node, ii);
5009 mode = btrfs_inode_mode(node, ii);
5010 dir = imode_to_type(mode) == BTRFS_FT_DIR;
5011 nlink = btrfs_inode_nlink(node, ii);
5012 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5015 ret = btrfs_next_item(root, path);
5017 /* out will fill 'err' rusing current statistics */
5019 } else if (ret > 0) {
5024 node = path->nodes[0];
5025 slot = path->slots[0];
5026 btrfs_item_key_to_cpu(node, &key, slot);
5027 if (key.objectid != inode_id)
5031 case BTRFS_INODE_REF_KEY:
5032 ret = check_inode_ref(root, &key, node, slot, &refs,
5036 case BTRFS_INODE_EXTREF_KEY:
5037 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5038 warning("root %llu EXTREF[%llu %llu] isn't supported",
5039 root->objectid, key.objectid,
5041 ret = check_inode_extref(root, &key, node, slot, &refs,
5045 case BTRFS_DIR_ITEM_KEY:
5046 case BTRFS_DIR_INDEX_KEY:
5048 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5049 root->objectid, inode_id,
5050 imode_to_type(mode), key.objectid,
5053 ret = check_dir_item(root, &key, node, slot, &size,
5057 case BTRFS_EXTENT_DATA_KEY:
5059 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5060 root->objectid, inode_id, key.objectid,
5063 ret = check_file_extent(root, &key, node, slot,
5064 nodatasum, &extent_size,
5068 case BTRFS_XATTR_ITEM_KEY:
5071 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5072 key.objectid, key.type, key.offset);
5077 /* verify INODE_ITEM nlink/isize/nbytes */
5080 err |= LINK_COUNT_ERROR;
5081 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5082 root->objectid, inode_id, nlink);
5086 * Just a warning, as dir inode nbytes is just an
5087 * instructive value.
5089 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5090 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5091 root->objectid, inode_id,
5092 root->fs_info->nodesize);
5095 if (isize != size) {
5097 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5098 root->objectid, inode_id, isize, size);
5101 if (nlink != refs) {
5102 err |= LINK_COUNT_ERROR;
5103 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5104 root->objectid, inode_id, nlink, refs);
5105 } else if (!nlink) {
5109 if (!nbytes && !no_holes && extent_end < isize) {
5110 err |= NBYTES_ERROR;
5111 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5112 root->objectid, inode_id, isize);
5115 if (nbytes != extent_size) {
5116 err |= NBYTES_ERROR;
5117 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5118 root->objectid, inode_id, nbytes, extent_size);
5125 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5127 struct btrfs_path path;
5128 struct btrfs_key key;
5132 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5133 key.type = BTRFS_INODE_ITEM_KEY;
5136 /* For root being dropped, we don't need to check first inode */
5137 if (btrfs_root_refs(&root->root_item) == 0 &&
5138 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5142 btrfs_init_path(&path);
5144 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5149 err |= INODE_ITEM_MISSING;
5150 error("first inode item of root %llu is missing",
5154 err |= check_inode_item(root, &path, ext_ref);
5159 btrfs_release_path(&path);
5163 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5164 u64 parent, u64 root)
5166 struct rb_node *node;
5167 struct tree_backref *back = NULL;
5168 struct tree_backref match = {
5175 match.parent = parent;
5176 match.node.full_backref = 1;
5181 node = rb_search(&rec->backref_tree, &match.node.node,
5182 (rb_compare_keys)compare_extent_backref, NULL);
5184 back = to_tree_backref(rb_node_to_extent_backref(node));
5189 static struct data_backref *find_data_backref(struct extent_record *rec,
5190 u64 parent, u64 root,
5191 u64 owner, u64 offset,
5193 u64 disk_bytenr, u64 bytes)
5195 struct rb_node *node;
5196 struct data_backref *back = NULL;
5197 struct data_backref match = {
5204 .found_ref = found_ref,
5205 .disk_bytenr = disk_bytenr,
5209 match.parent = parent;
5210 match.node.full_backref = 1;
5215 node = rb_search(&rec->backref_tree, &match.node.node,
5216 (rb_compare_keys)compare_extent_backref, NULL);
5218 back = to_data_backref(rb_node_to_extent_backref(node));
5223 * Iterate all item on the tree and call check_inode_item() to check.
5225 * @root: the root of the tree to be checked.
5226 * @ext_ref: the EXTENDED_IREF feature
5228 * Return 0 if no error found.
5229 * Return <0 for error.
5231 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5233 struct btrfs_path path;
5234 struct node_refs nrefs;
5235 struct btrfs_root_item *root_item = &root->root_item;
5241 * We need to manually check the first inode item(256)
5242 * As the following traversal function will only start from
5243 * the first inode item in the leaf, if inode item(256) is missing
5244 * we will just skip it forever.
5246 ret = check_fs_first_inode(root, ext_ref);
5250 memset(&nrefs, 0, sizeof(nrefs));
5251 level = btrfs_header_level(root->node);
5252 btrfs_init_path(&path);
5254 if (btrfs_root_refs(root_item) > 0 ||
5255 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5256 path.nodes[level] = root->node;
5257 path.slots[level] = 0;
5258 extent_buffer_get(root->node);
5260 struct btrfs_key key;
5262 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5263 level = root_item->drop_level;
5264 path.lowest_level = level;
5265 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5272 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5275 /* if ret is negative, walk shall stop */
5281 ret = walk_up_tree_v2(root, &path, &level);
5283 /* Normal exit, reset ret to err */
5290 btrfs_release_path(&path);
5295 * Find the relative ref for root_ref and root_backref.
5297 * @root: the root of the root tree.
5298 * @ref_key: the key of the root ref.
5300 * Return 0 if no error occurred.
5302 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5303 struct extent_buffer *node, int slot)
5305 struct btrfs_path path;
5306 struct btrfs_key key;
5307 struct btrfs_root_ref *ref;
5308 struct btrfs_root_ref *backref;
5309 char ref_name[BTRFS_NAME_LEN] = {0};
5310 char backref_name[BTRFS_NAME_LEN] = {0};
5316 u32 backref_namelen;
5321 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5322 ref_dirid = btrfs_root_ref_dirid(node, ref);
5323 ref_seq = btrfs_root_ref_sequence(node, ref);
5324 ref_namelen = btrfs_root_ref_name_len(node, ref);
5326 if (ref_namelen <= BTRFS_NAME_LEN) {
5329 len = BTRFS_NAME_LEN;
5330 warning("%s[%llu %llu] ref_name too long",
5331 ref_key->type == BTRFS_ROOT_REF_KEY ?
5332 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5335 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5337 /* Find relative root_ref */
5338 key.objectid = ref_key->offset;
5339 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5340 key.offset = ref_key->objectid;
5342 btrfs_init_path(&path);
5343 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5345 err |= ROOT_REF_MISSING;
5346 error("%s[%llu %llu] couldn't find relative ref",
5347 ref_key->type == BTRFS_ROOT_REF_KEY ?
5348 "ROOT_REF" : "ROOT_BACKREF",
5349 ref_key->objectid, ref_key->offset);
5353 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5354 struct btrfs_root_ref);
5355 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5356 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5357 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5359 if (backref_namelen <= BTRFS_NAME_LEN) {
5360 len = backref_namelen;
5362 len = BTRFS_NAME_LEN;
5363 warning("%s[%llu %llu] ref_name too long",
5364 key.type == BTRFS_ROOT_REF_KEY ?
5365 "ROOT_REF" : "ROOT_BACKREF",
5366 key.objectid, key.offset);
5368 read_extent_buffer(path.nodes[0], backref_name,
5369 (unsigned long)(backref + 1), len);
5371 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5372 ref_namelen != backref_namelen ||
5373 strncmp(ref_name, backref_name, len)) {
5374 err |= ROOT_REF_MISMATCH;
5375 error("%s[%llu %llu] mismatch relative ref",
5376 ref_key->type == BTRFS_ROOT_REF_KEY ?
5377 "ROOT_REF" : "ROOT_BACKREF",
5378 ref_key->objectid, ref_key->offset);
5381 btrfs_release_path(&path);
5386 * Check all fs/file tree in low_memory mode.
5388 * 1. for fs tree root item, call check_fs_root_v2()
5389 * 2. for fs tree root ref/backref, call check_root_ref()
5391 * Return 0 if no error occurred.
5393 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5395 struct btrfs_root *tree_root = fs_info->tree_root;
5396 struct btrfs_root *cur_root = NULL;
5397 struct btrfs_path path;
5398 struct btrfs_key key;
5399 struct extent_buffer *node;
5400 unsigned int ext_ref;
5405 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5407 btrfs_init_path(&path);
5408 key.objectid = BTRFS_FS_TREE_OBJECTID;
5410 key.type = BTRFS_ROOT_ITEM_KEY;
5412 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5416 } else if (ret > 0) {
5422 node = path.nodes[0];
5423 slot = path.slots[0];
5424 btrfs_item_key_to_cpu(node, &key, slot);
5425 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5427 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5428 fs_root_objectid(key.objectid)) {
5429 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5430 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5433 key.offset = (u64)-1;
5434 cur_root = btrfs_read_fs_root(fs_info, &key);
5437 if (IS_ERR(cur_root)) {
5438 error("Fail to read fs/subvol tree: %lld",
5444 ret = check_fs_root_v2(cur_root, ext_ref);
5447 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5448 btrfs_free_fs_root(cur_root);
5449 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5450 key.type == BTRFS_ROOT_BACKREF_KEY) {
5451 ret = check_root_ref(tree_root, &key, node, slot);
5455 ret = btrfs_next_item(tree_root, &path);
5465 btrfs_release_path(&path);
5469 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
5470 struct cache_tree *root_cache)
5474 if (!ctx.progress_enabled)
5475 fprintf(stderr, "checking fs roots\n");
5476 if (check_mode == CHECK_MODE_LOWMEM)
5477 ret = check_fs_roots_v2(fs_info);
5479 ret = check_fs_roots(fs_info, root_cache);
5484 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5486 struct list_head *cur = rec->backrefs.next;
5487 struct extent_backref *back;
5488 struct tree_backref *tback;
5489 struct data_backref *dback;
5493 while(cur != &rec->backrefs) {
5494 back = to_extent_backref(cur);
5496 if (!back->found_extent_tree) {
5500 if (back->is_data) {
5501 dback = to_data_backref(back);
5502 fprintf(stderr, "Data backref %llu %s %llu"
5503 " owner %llu offset %llu num_refs %lu"
5504 " not found in extent tree\n",
5505 (unsigned long long)rec->start,
5506 back->full_backref ?
5508 back->full_backref ?
5509 (unsigned long long)dback->parent:
5510 (unsigned long long)dback->root,
5511 (unsigned long long)dback->owner,
5512 (unsigned long long)dback->offset,
5513 (unsigned long)dback->num_refs);
5515 tback = to_tree_backref(back);
5516 fprintf(stderr, "Tree backref %llu parent %llu"
5517 " root %llu not found in extent tree\n",
5518 (unsigned long long)rec->start,
5519 (unsigned long long)tback->parent,
5520 (unsigned long long)tback->root);
5523 if (!back->is_data && !back->found_ref) {
5527 tback = to_tree_backref(back);
5528 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5529 (unsigned long long)rec->start,
5530 back->full_backref ? "parent" : "root",
5531 back->full_backref ?
5532 (unsigned long long)tback->parent :
5533 (unsigned long long)tback->root, back);
5535 if (back->is_data) {
5536 dback = to_data_backref(back);
5537 if (dback->found_ref != dback->num_refs) {
5541 fprintf(stderr, "Incorrect local backref count"
5542 " on %llu %s %llu owner %llu"
5543 " offset %llu found %u wanted %u back %p\n",
5544 (unsigned long long)rec->start,
5545 back->full_backref ?
5547 back->full_backref ?
5548 (unsigned long long)dback->parent:
5549 (unsigned long long)dback->root,
5550 (unsigned long long)dback->owner,
5551 (unsigned long long)dback->offset,
5552 dback->found_ref, dback->num_refs, back);
5554 if (dback->disk_bytenr != rec->start) {
5558 fprintf(stderr, "Backref disk bytenr does not"
5559 " match extent record, bytenr=%llu, "
5560 "ref bytenr=%llu\n",
5561 (unsigned long long)rec->start,
5562 (unsigned long long)dback->disk_bytenr);
5565 if (dback->bytes != rec->nr) {
5569 fprintf(stderr, "Backref bytes do not match "
5570 "extent backref, bytenr=%llu, ref "
5571 "bytes=%llu, backref bytes=%llu\n",
5572 (unsigned long long)rec->start,
5573 (unsigned long long)rec->nr,
5574 (unsigned long long)dback->bytes);
5577 if (!back->is_data) {
5580 dback = to_data_backref(back);
5581 found += dback->found_ref;
5584 if (found != rec->refs) {
5588 fprintf(stderr, "Incorrect global backref count "
5589 "on %llu found %llu wanted %llu\n",
5590 (unsigned long long)rec->start,
5591 (unsigned long long)found,
5592 (unsigned long long)rec->refs);
5598 static int free_all_extent_backrefs(struct extent_record *rec)
5600 struct extent_backref *back;
5601 struct list_head *cur;
5602 while (!list_empty(&rec->backrefs)) {
5603 cur = rec->backrefs.next;
5604 back = to_extent_backref(cur);
5611 static void free_extent_record_cache(struct cache_tree *extent_cache)
5613 struct cache_extent *cache;
5614 struct extent_record *rec;
5617 cache = first_cache_extent(extent_cache);
5620 rec = container_of(cache, struct extent_record, cache);
5621 remove_cache_extent(extent_cache, cache);
5622 free_all_extent_backrefs(rec);
5627 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5628 struct extent_record *rec)
5630 if (rec->content_checked && rec->owner_ref_checked &&
5631 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5632 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5633 !rec->bad_full_backref && !rec->crossing_stripes &&
5634 !rec->wrong_chunk_type) {
5635 remove_cache_extent(extent_cache, &rec->cache);
5636 free_all_extent_backrefs(rec);
5637 list_del_init(&rec->list);
5643 static int check_owner_ref(struct btrfs_root *root,
5644 struct extent_record *rec,
5645 struct extent_buffer *buf)
5647 struct extent_backref *node;
5648 struct tree_backref *back;
5649 struct btrfs_root *ref_root;
5650 struct btrfs_key key;
5651 struct btrfs_path path;
5652 struct extent_buffer *parent;
5657 list_for_each_entry(node, &rec->backrefs, list) {
5660 if (!node->found_ref)
5662 if (node->full_backref)
5664 back = to_tree_backref(node);
5665 if (btrfs_header_owner(buf) == back->root)
5668 BUG_ON(rec->is_root);
5670 /* try to find the block by search corresponding fs tree */
5671 key.objectid = btrfs_header_owner(buf);
5672 key.type = BTRFS_ROOT_ITEM_KEY;
5673 key.offset = (u64)-1;
5675 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5676 if (IS_ERR(ref_root))
5679 level = btrfs_header_level(buf);
5681 btrfs_item_key_to_cpu(buf, &key, 0);
5683 btrfs_node_key_to_cpu(buf, &key, 0);
5685 btrfs_init_path(&path);
5686 path.lowest_level = level + 1;
5687 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5691 parent = path.nodes[level + 1];
5692 if (parent && buf->start == btrfs_node_blockptr(parent,
5693 path.slots[level + 1]))
5696 btrfs_release_path(&path);
5697 return found ? 0 : 1;
5700 static int is_extent_tree_record(struct extent_record *rec)
5702 struct list_head *cur = rec->backrefs.next;
5703 struct extent_backref *node;
5704 struct tree_backref *back;
5707 while(cur != &rec->backrefs) {
5708 node = to_extent_backref(cur);
5712 back = to_tree_backref(node);
5713 if (node->full_backref)
5715 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5722 static int record_bad_block_io(struct btrfs_fs_info *info,
5723 struct cache_tree *extent_cache,
5726 struct extent_record *rec;
5727 struct cache_extent *cache;
5728 struct btrfs_key key;
5730 cache = lookup_cache_extent(extent_cache, start, len);
5734 rec = container_of(cache, struct extent_record, cache);
5735 if (!is_extent_tree_record(rec))
5738 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5739 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5742 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5743 struct extent_buffer *buf, int slot)
5745 if (btrfs_header_level(buf)) {
5746 struct btrfs_key_ptr ptr1, ptr2;
5748 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5749 sizeof(struct btrfs_key_ptr));
5750 read_extent_buffer(buf, &ptr2,
5751 btrfs_node_key_ptr_offset(slot + 1),
5752 sizeof(struct btrfs_key_ptr));
5753 write_extent_buffer(buf, &ptr1,
5754 btrfs_node_key_ptr_offset(slot + 1),
5755 sizeof(struct btrfs_key_ptr));
5756 write_extent_buffer(buf, &ptr2,
5757 btrfs_node_key_ptr_offset(slot),
5758 sizeof(struct btrfs_key_ptr));
5760 struct btrfs_disk_key key;
5761 btrfs_node_key(buf, &key, 0);
5762 btrfs_fixup_low_keys(root, path, &key,
5763 btrfs_header_level(buf) + 1);
5766 struct btrfs_item *item1, *item2;
5767 struct btrfs_key k1, k2;
5768 char *item1_data, *item2_data;
5769 u32 item1_offset, item2_offset, item1_size, item2_size;
5771 item1 = btrfs_item_nr(slot);
5772 item2 = btrfs_item_nr(slot + 1);
5773 btrfs_item_key_to_cpu(buf, &k1, slot);
5774 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5775 item1_offset = btrfs_item_offset(buf, item1);
5776 item2_offset = btrfs_item_offset(buf, item2);
5777 item1_size = btrfs_item_size(buf, item1);
5778 item2_size = btrfs_item_size(buf, item2);
5780 item1_data = malloc(item1_size);
5783 item2_data = malloc(item2_size);
5789 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5790 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5792 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5793 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5797 btrfs_set_item_offset(buf, item1, item2_offset);
5798 btrfs_set_item_offset(buf, item2, item1_offset);
5799 btrfs_set_item_size(buf, item1, item2_size);
5800 btrfs_set_item_size(buf, item2, item1_size);
5802 path->slots[0] = slot;
5803 btrfs_set_item_key_unsafe(root, path, &k2);
5804 path->slots[0] = slot + 1;
5805 btrfs_set_item_key_unsafe(root, path, &k1);
5810 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5812 struct extent_buffer *buf;
5813 struct btrfs_key k1, k2;
5815 int level = path->lowest_level;
5818 buf = path->nodes[level];
5819 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5821 btrfs_node_key_to_cpu(buf, &k1, i);
5822 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5824 btrfs_item_key_to_cpu(buf, &k1, i);
5825 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5827 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5829 ret = swap_values(root, path, buf, i);
5832 btrfs_mark_buffer_dirty(buf);
5838 static int delete_bogus_item(struct btrfs_root *root,
5839 struct btrfs_path *path,
5840 struct extent_buffer *buf, int slot)
5842 struct btrfs_key key;
5843 int nritems = btrfs_header_nritems(buf);
5845 btrfs_item_key_to_cpu(buf, &key, slot);
5847 /* These are all the keys we can deal with missing. */
5848 if (key.type != BTRFS_DIR_INDEX_KEY &&
5849 key.type != BTRFS_EXTENT_ITEM_KEY &&
5850 key.type != BTRFS_METADATA_ITEM_KEY &&
5851 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5852 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5855 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5856 (unsigned long long)key.objectid, key.type,
5857 (unsigned long long)key.offset, slot, buf->start);
5858 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5859 btrfs_item_nr_offset(slot + 1),
5860 sizeof(struct btrfs_item) *
5861 (nritems - slot - 1));
5862 btrfs_set_header_nritems(buf, nritems - 1);
5864 struct btrfs_disk_key disk_key;
5866 btrfs_item_key(buf, &disk_key, 0);
5867 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5869 btrfs_mark_buffer_dirty(buf);
5873 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5875 struct extent_buffer *buf;
5879 /* We should only get this for leaves */
5880 BUG_ON(path->lowest_level);
5881 buf = path->nodes[0];
5883 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5884 unsigned int shift = 0, offset;
5886 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5887 BTRFS_LEAF_DATA_SIZE(root)) {
5888 if (btrfs_item_end_nr(buf, i) >
5889 BTRFS_LEAF_DATA_SIZE(root)) {
5890 ret = delete_bogus_item(root, path, buf, i);
5893 fprintf(stderr, "item is off the end of the "
5894 "leaf, can't fix\n");
5898 shift = BTRFS_LEAF_DATA_SIZE(root) -
5899 btrfs_item_end_nr(buf, i);
5900 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5901 btrfs_item_offset_nr(buf, i - 1)) {
5902 if (btrfs_item_end_nr(buf, i) >
5903 btrfs_item_offset_nr(buf, i - 1)) {
5904 ret = delete_bogus_item(root, path, buf, i);
5907 fprintf(stderr, "items overlap, can't fix\n");
5911 shift = btrfs_item_offset_nr(buf, i - 1) -
5912 btrfs_item_end_nr(buf, i);
5917 printf("Shifting item nr %d by %u bytes in block %llu\n",
5918 i, shift, (unsigned long long)buf->start);
5919 offset = btrfs_item_offset_nr(buf, i);
5920 memmove_extent_buffer(buf,
5921 btrfs_leaf_data(buf) + offset + shift,
5922 btrfs_leaf_data(buf) + offset,
5923 btrfs_item_size_nr(buf, i));
5924 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5926 btrfs_mark_buffer_dirty(buf);
5930 * We may have moved things, in which case we want to exit so we don't
5931 * write those changes out. Once we have proper abort functionality in
5932 * progs this can be changed to something nicer.
5939 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5940 * then just return -EIO.
5942 static int try_to_fix_bad_block(struct btrfs_root *root,
5943 struct extent_buffer *buf,
5944 enum btrfs_tree_block_status status)
5946 struct btrfs_trans_handle *trans;
5947 struct ulist *roots;
5948 struct ulist_node *node;
5949 struct btrfs_root *search_root;
5950 struct btrfs_path path;
5951 struct ulist_iterator iter;
5952 struct btrfs_key root_key, key;
5955 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5956 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5959 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5963 btrfs_init_path(&path);
5964 ULIST_ITER_INIT(&iter);
5965 while ((node = ulist_next(roots, &iter))) {
5966 root_key.objectid = node->val;
5967 root_key.type = BTRFS_ROOT_ITEM_KEY;
5968 root_key.offset = (u64)-1;
5970 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5977 trans = btrfs_start_transaction(search_root, 0);
5978 if (IS_ERR(trans)) {
5979 ret = PTR_ERR(trans);
5983 path.lowest_level = btrfs_header_level(buf);
5984 path.skip_check_block = 1;
5985 if (path.lowest_level)
5986 btrfs_node_key_to_cpu(buf, &key, 0);
5988 btrfs_item_key_to_cpu(buf, &key, 0);
5989 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5992 btrfs_commit_transaction(trans, search_root);
5995 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5996 ret = fix_key_order(search_root, &path);
5997 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5998 ret = fix_item_offset(search_root, &path);
6000 btrfs_commit_transaction(trans, search_root);
6003 btrfs_release_path(&path);
6004 btrfs_commit_transaction(trans, search_root);
6007 btrfs_release_path(&path);
6011 static int check_block(struct btrfs_root *root,
6012 struct cache_tree *extent_cache,
6013 struct extent_buffer *buf, u64 flags)
6015 struct extent_record *rec;
6016 struct cache_extent *cache;
6017 struct btrfs_key key;
6018 enum btrfs_tree_block_status status;
6022 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6025 rec = container_of(cache, struct extent_record, cache);
6026 rec->generation = btrfs_header_generation(buf);
6028 level = btrfs_header_level(buf);
6029 if (btrfs_header_nritems(buf) > 0) {
6032 btrfs_item_key_to_cpu(buf, &key, 0);
6034 btrfs_node_key_to_cpu(buf, &key, 0);
6036 rec->info_objectid = key.objectid;
6038 rec->info_level = level;
6040 if (btrfs_is_leaf(buf))
6041 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6043 status = btrfs_check_node(root, &rec->parent_key, buf);
6045 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6047 status = try_to_fix_bad_block(root, buf, status);
6048 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6050 fprintf(stderr, "bad block %llu\n",
6051 (unsigned long long)buf->start);
6054 * Signal to callers we need to start the scan over
6055 * again since we'll have cowed blocks.
6060 rec->content_checked = 1;
6061 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6062 rec->owner_ref_checked = 1;
6064 ret = check_owner_ref(root, rec, buf);
6066 rec->owner_ref_checked = 1;
6070 maybe_free_extent_rec(extent_cache, rec);
6075 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6076 u64 parent, u64 root)
6078 struct list_head *cur = rec->backrefs.next;
6079 struct extent_backref *node;
6080 struct tree_backref *back;
6082 while(cur != &rec->backrefs) {
6083 node = to_extent_backref(cur);
6087 back = to_tree_backref(node);
6089 if (!node->full_backref)
6091 if (parent == back->parent)
6094 if (node->full_backref)
6096 if (back->root == root)
6104 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
6105 u64 parent, u64 root)
6107 struct tree_backref *ref = malloc(sizeof(*ref));
6111 memset(&ref->node, 0, sizeof(ref->node));
6113 ref->parent = parent;
6114 ref->node.full_backref = 1;
6117 ref->node.full_backref = 0;
6119 list_add_tail(&ref->node.list, &rec->backrefs);
6125 static struct data_backref *find_data_backref(struct extent_record *rec,
6126 u64 parent, u64 root,
6127 u64 owner, u64 offset,
6129 u64 disk_bytenr, u64 bytes)
6131 struct list_head *cur = rec->backrefs.next;
6132 struct extent_backref *node;
6133 struct data_backref *back;
6135 while(cur != &rec->backrefs) {
6136 node = to_extent_backref(cur);
6140 back = to_data_backref(node);
6142 if (!node->full_backref)
6144 if (parent == back->parent)
6147 if (node->full_backref)
6149 if (back->root == root && back->owner == owner &&
6150 back->offset == offset) {
6151 if (found_ref && node->found_ref &&
6152 (back->bytes != bytes ||
6153 back->disk_bytenr != disk_bytenr))
6163 static struct data_backref *alloc_data_backref(struct extent_record *rec,
6164 u64 parent, u64 root,
6165 u64 owner, u64 offset,
6168 struct data_backref *ref = malloc(sizeof(*ref));
6172 memset(&ref->node, 0, sizeof(ref->node));
6173 ref->node.is_data = 1;
6176 ref->parent = parent;
6179 ref->node.full_backref = 1;
6183 ref->offset = offset;
6184 ref->node.full_backref = 0;
6186 ref->bytes = max_size;
6189 list_add_tail(&ref->node.list, &rec->backrefs);
6190 if (max_size > rec->max_size)
6191 rec->max_size = max_size;
6195 /* Check if the type of extent matches with its chunk */
6196 static void check_extent_type(struct extent_record *rec)
6198 struct btrfs_block_group_cache *bg_cache;
6200 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6204 /* data extent, check chunk directly*/
6205 if (!rec->metadata) {
6206 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6207 rec->wrong_chunk_type = 1;
6211 /* metadata extent, check the obvious case first */
6212 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6213 BTRFS_BLOCK_GROUP_METADATA))) {
6214 rec->wrong_chunk_type = 1;
6219 * Check SYSTEM extent, as it's also marked as metadata, we can only
6220 * make sure it's a SYSTEM extent by its backref
6222 if (!list_empty(&rec->backrefs)) {
6223 struct extent_backref *node;
6224 struct tree_backref *tback;
6227 node = to_extent_backref(rec->backrefs.next);
6228 if (node->is_data) {
6229 /* tree block shouldn't have data backref */
6230 rec->wrong_chunk_type = 1;
6233 tback = container_of(node, struct tree_backref, node);
6235 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6236 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6238 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6239 if (!(bg_cache->flags & bg_type))
6240 rec->wrong_chunk_type = 1;
6245 * Allocate a new extent record, fill default values from @tmpl and insert int
6246 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6247 * the cache, otherwise it fails.
6249 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6250 struct extent_record *tmpl)
6252 struct extent_record *rec;
6255 BUG_ON(tmpl->max_size == 0);
6256 rec = malloc(sizeof(*rec));
6259 rec->start = tmpl->start;
6260 rec->max_size = tmpl->max_size;
6261 rec->nr = max(tmpl->nr, tmpl->max_size);
6262 rec->found_rec = tmpl->found_rec;
6263 rec->content_checked = tmpl->content_checked;
6264 rec->owner_ref_checked = tmpl->owner_ref_checked;
6265 rec->num_duplicates = 0;
6266 rec->metadata = tmpl->metadata;
6267 rec->flag_block_full_backref = FLAG_UNSET;
6268 rec->bad_full_backref = 0;
6269 rec->crossing_stripes = 0;
6270 rec->wrong_chunk_type = 0;
6271 rec->is_root = tmpl->is_root;
6272 rec->refs = tmpl->refs;
6273 rec->extent_item_refs = tmpl->extent_item_refs;
6274 rec->parent_generation = tmpl->parent_generation;
6275 INIT_LIST_HEAD(&rec->backrefs);
6276 INIT_LIST_HEAD(&rec->dups);
6277 INIT_LIST_HEAD(&rec->list);
6278 rec->backref_tree = RB_ROOT;
6279 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6280 rec->cache.start = tmpl->start;
6281 rec->cache.size = tmpl->nr;
6282 ret = insert_cache_extent(extent_cache, &rec->cache);
6287 bytes_used += rec->nr;
6290 rec->crossing_stripes = check_crossing_stripes(global_info,
6291 rec->start, global_info->nodesize);
6292 check_extent_type(rec);
6297 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6299 * - refs - if found, increase refs
6300 * - is_root - if found, set
6301 * - content_checked - if found, set
6302 * - owner_ref_checked - if found, set
6304 * If not found, create a new one, initialize and insert.
6306 static int add_extent_rec(struct cache_tree *extent_cache,
6307 struct extent_record *tmpl)
6309 struct extent_record *rec;
6310 struct cache_extent *cache;
6314 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6316 rec = container_of(cache, struct extent_record, cache);
6320 rec->nr = max(tmpl->nr, tmpl->max_size);
6323 * We need to make sure to reset nr to whatever the extent
6324 * record says was the real size, this way we can compare it to
6327 if (tmpl->found_rec) {
6328 if (tmpl->start != rec->start || rec->found_rec) {
6329 struct extent_record *tmp;
6332 if (list_empty(&rec->list))
6333 list_add_tail(&rec->list,
6334 &duplicate_extents);
6337 * We have to do this song and dance in case we
6338 * find an extent record that falls inside of
6339 * our current extent record but does not have
6340 * the same objectid.
6342 tmp = malloc(sizeof(*tmp));
6345 tmp->start = tmpl->start;
6346 tmp->max_size = tmpl->max_size;
6349 tmp->metadata = tmpl->metadata;
6350 tmp->extent_item_refs = tmpl->extent_item_refs;
6351 INIT_LIST_HEAD(&tmp->list);
6352 list_add_tail(&tmp->list, &rec->dups);
6353 rec->num_duplicates++;
6360 if (tmpl->extent_item_refs && !dup) {
6361 if (rec->extent_item_refs) {
6362 fprintf(stderr, "block %llu rec "
6363 "extent_item_refs %llu, passed %llu\n",
6364 (unsigned long long)tmpl->start,
6365 (unsigned long long)
6366 rec->extent_item_refs,
6367 (unsigned long long)tmpl->extent_item_refs);
6369 rec->extent_item_refs = tmpl->extent_item_refs;
6373 if (tmpl->content_checked)
6374 rec->content_checked = 1;
6375 if (tmpl->owner_ref_checked)
6376 rec->owner_ref_checked = 1;
6377 memcpy(&rec->parent_key, &tmpl->parent_key,
6378 sizeof(tmpl->parent_key));
6379 if (tmpl->parent_generation)
6380 rec->parent_generation = tmpl->parent_generation;
6381 if (rec->max_size < tmpl->max_size)
6382 rec->max_size = tmpl->max_size;
6385 * A metadata extent can't cross stripe_len boundary, otherwise
6386 * kernel scrub won't be able to handle it.
6387 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6391 rec->crossing_stripes = check_crossing_stripes(
6392 global_info, rec->start,
6393 global_info->nodesize);
6394 check_extent_type(rec);
6395 maybe_free_extent_rec(extent_cache, rec);
6399 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6404 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6405 u64 parent, u64 root, int found_ref)
6407 struct extent_record *rec;
6408 struct tree_backref *back;
6409 struct cache_extent *cache;
6411 bool insert = false;
6413 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6415 struct extent_record tmpl;
6417 memset(&tmpl, 0, sizeof(tmpl));
6418 tmpl.start = bytenr;
6423 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6427 /* really a bug in cache_extent implement now */
6428 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6433 rec = container_of(cache, struct extent_record, cache);
6434 if (rec->start != bytenr) {
6436 * Several cause, from unaligned bytenr to over lapping extents
6441 back = find_tree_backref(rec, parent, root);
6443 back = alloc_tree_backref(rec, parent, root);
6450 if (back->node.found_ref) {
6451 fprintf(stderr, "Extent back ref already exists "
6452 "for %llu parent %llu root %llu \n",
6453 (unsigned long long)bytenr,
6454 (unsigned long long)parent,
6455 (unsigned long long)root);
6457 back->node.found_ref = 1;
6459 if (back->node.found_extent_tree) {
6460 fprintf(stderr, "Extent back ref already exists "
6461 "for %llu parent %llu root %llu \n",
6462 (unsigned long long)bytenr,
6463 (unsigned long long)parent,
6464 (unsigned long long)root);
6466 back->node.found_extent_tree = 1;
6469 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6470 compare_extent_backref));
6471 check_extent_type(rec);
6472 maybe_free_extent_rec(extent_cache, rec);
6476 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6477 u64 parent, u64 root, u64 owner, u64 offset,
6478 u32 num_refs, int found_ref, u64 max_size)
6480 struct extent_record *rec;
6481 struct data_backref *back;
6482 struct cache_extent *cache;
6484 bool insert = false;
6486 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6488 struct extent_record tmpl;
6490 memset(&tmpl, 0, sizeof(tmpl));
6491 tmpl.start = bytenr;
6493 tmpl.max_size = max_size;
6495 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6499 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6504 rec = container_of(cache, struct extent_record, cache);
6505 if (rec->max_size < max_size)
6506 rec->max_size = max_size;
6509 * If found_ref is set then max_size is the real size and must match the
6510 * existing refs. So if we have already found a ref then we need to
6511 * make sure that this ref matches the existing one, otherwise we need
6512 * to add a new backref so we can notice that the backrefs don't match
6513 * and we need to figure out who is telling the truth. This is to
6514 * account for that awful fsync bug I introduced where we'd end up with
6515 * a btrfs_file_extent_item that would have its length include multiple
6516 * prealloc extents or point inside of a prealloc extent.
6518 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6521 back = alloc_data_backref(rec, parent, root, owner, offset,
6528 BUG_ON(num_refs != 1);
6529 if (back->node.found_ref)
6530 BUG_ON(back->bytes != max_size);
6531 back->node.found_ref = 1;
6532 back->found_ref += 1;
6533 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
6534 back->bytes = max_size;
6535 back->disk_bytenr = bytenr;
6537 /* Need to reinsert if not already in the tree */
6539 rb_erase(&back->node.node, &rec->backref_tree);
6544 rec->content_checked = 1;
6545 rec->owner_ref_checked = 1;
6547 if (back->node.found_extent_tree) {
6548 fprintf(stderr, "Extent back ref already exists "
6549 "for %llu parent %llu root %llu "
6550 "owner %llu offset %llu num_refs %lu\n",
6551 (unsigned long long)bytenr,
6552 (unsigned long long)parent,
6553 (unsigned long long)root,
6554 (unsigned long long)owner,
6555 (unsigned long long)offset,
6556 (unsigned long)num_refs);
6558 back->num_refs = num_refs;
6559 back->node.found_extent_tree = 1;
6562 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
6563 compare_extent_backref));
6565 maybe_free_extent_rec(extent_cache, rec);
6569 static int add_pending(struct cache_tree *pending,
6570 struct cache_tree *seen, u64 bytenr, u32 size)
6573 ret = add_cache_extent(seen, bytenr, size);
6576 add_cache_extent(pending, bytenr, size);
6580 static int pick_next_pending(struct cache_tree *pending,
6581 struct cache_tree *reada,
6582 struct cache_tree *nodes,
6583 u64 last, struct block_info *bits, int bits_nr,
6586 unsigned long node_start = last;
6587 struct cache_extent *cache;
6590 cache = search_cache_extent(reada, 0);
6592 bits[0].start = cache->start;
6593 bits[0].size = cache->size;
6598 if (node_start > 32768)
6599 node_start -= 32768;
6601 cache = search_cache_extent(nodes, node_start);
6603 cache = search_cache_extent(nodes, 0);
6606 cache = search_cache_extent(pending, 0);
6611 bits[ret].start = cache->start;
6612 bits[ret].size = cache->size;
6613 cache = next_cache_extent(cache);
6615 } while (cache && ret < bits_nr);
6621 bits[ret].start = cache->start;
6622 bits[ret].size = cache->size;
6623 cache = next_cache_extent(cache);
6625 } while (cache && ret < bits_nr);
6627 if (bits_nr - ret > 8) {
6628 u64 lookup = bits[0].start + bits[0].size;
6629 struct cache_extent *next;
6630 next = search_cache_extent(pending, lookup);
6632 if (next->start - lookup > 32768)
6634 bits[ret].start = next->start;
6635 bits[ret].size = next->size;
6636 lookup = next->start + next->size;
6640 next = next_cache_extent(next);
6648 static void free_chunk_record(struct cache_extent *cache)
6650 struct chunk_record *rec;
6652 rec = container_of(cache, struct chunk_record, cache);
6653 list_del_init(&rec->list);
6654 list_del_init(&rec->dextents);
6658 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6660 cache_tree_free_extents(chunk_cache, free_chunk_record);
6663 static void free_device_record(struct rb_node *node)
6665 struct device_record *rec;
6667 rec = container_of(node, struct device_record, node);
6671 FREE_RB_BASED_TREE(device_cache, free_device_record);
6673 int insert_block_group_record(struct block_group_tree *tree,
6674 struct block_group_record *bg_rec)
6678 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6682 list_add_tail(&bg_rec->list, &tree->block_groups);
6686 static void free_block_group_record(struct cache_extent *cache)
6688 struct block_group_record *rec;
6690 rec = container_of(cache, struct block_group_record, cache);
6691 list_del_init(&rec->list);
6695 void free_block_group_tree(struct block_group_tree *tree)
6697 cache_tree_free_extents(&tree->tree, free_block_group_record);
6700 int insert_device_extent_record(struct device_extent_tree *tree,
6701 struct device_extent_record *de_rec)
6706 * Device extent is a bit different from the other extents, because
6707 * the extents which belong to the different devices may have the
6708 * same start and size, so we need use the special extent cache
6709 * search/insert functions.
6711 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6715 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6716 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6720 static void free_device_extent_record(struct cache_extent *cache)
6722 struct device_extent_record *rec;
6724 rec = container_of(cache, struct device_extent_record, cache);
6725 if (!list_empty(&rec->chunk_list))
6726 list_del_init(&rec->chunk_list);
6727 if (!list_empty(&rec->device_list))
6728 list_del_init(&rec->device_list);
6732 void free_device_extent_tree(struct device_extent_tree *tree)
6734 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6737 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6738 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6739 struct extent_buffer *leaf, int slot)
6741 struct btrfs_extent_ref_v0 *ref0;
6742 struct btrfs_key key;
6745 btrfs_item_key_to_cpu(leaf, &key, slot);
6746 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6747 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6748 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6751 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6752 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6758 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6759 struct btrfs_key *key,
6762 struct btrfs_chunk *ptr;
6763 struct chunk_record *rec;
6766 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6767 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6769 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6771 fprintf(stderr, "memory allocation failed\n");
6775 INIT_LIST_HEAD(&rec->list);
6776 INIT_LIST_HEAD(&rec->dextents);
6779 rec->cache.start = key->offset;
6780 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6782 rec->generation = btrfs_header_generation(leaf);
6784 rec->objectid = key->objectid;
6785 rec->type = key->type;
6786 rec->offset = key->offset;
6788 rec->length = rec->cache.size;
6789 rec->owner = btrfs_chunk_owner(leaf, ptr);
6790 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6791 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6792 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6793 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6794 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6795 rec->num_stripes = num_stripes;
6796 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6798 for (i = 0; i < rec->num_stripes; ++i) {
6799 rec->stripes[i].devid =
6800 btrfs_stripe_devid_nr(leaf, ptr, i);
6801 rec->stripes[i].offset =
6802 btrfs_stripe_offset_nr(leaf, ptr, i);
6803 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6804 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6811 static int process_chunk_item(struct cache_tree *chunk_cache,
6812 struct btrfs_key *key, struct extent_buffer *eb,
6815 struct chunk_record *rec;
6816 struct btrfs_chunk *chunk;
6819 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6821 * Do extra check for this chunk item,
6823 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6824 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6825 * and owner<->key_type check.
6827 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6830 error("chunk(%llu, %llu) is not valid, ignore it",
6831 key->offset, btrfs_chunk_length(eb, chunk));
6834 rec = btrfs_new_chunk_record(eb, key, slot);
6835 ret = insert_cache_extent(chunk_cache, &rec->cache);
6837 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6838 rec->offset, rec->length);
6845 static int process_device_item(struct rb_root *dev_cache,
6846 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6848 struct btrfs_dev_item *ptr;
6849 struct device_record *rec;
6852 ptr = btrfs_item_ptr(eb,
6853 slot, struct btrfs_dev_item);
6855 rec = malloc(sizeof(*rec));
6857 fprintf(stderr, "memory allocation failed\n");
6861 rec->devid = key->offset;
6862 rec->generation = btrfs_header_generation(eb);
6864 rec->objectid = key->objectid;
6865 rec->type = key->type;
6866 rec->offset = key->offset;
6868 rec->devid = btrfs_device_id(eb, ptr);
6869 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6870 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6872 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6874 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6881 struct block_group_record *
6882 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6885 struct btrfs_block_group_item *ptr;
6886 struct block_group_record *rec;
6888 rec = calloc(1, sizeof(*rec));
6890 fprintf(stderr, "memory allocation failed\n");
6894 rec->cache.start = key->objectid;
6895 rec->cache.size = key->offset;
6897 rec->generation = btrfs_header_generation(leaf);
6899 rec->objectid = key->objectid;
6900 rec->type = key->type;
6901 rec->offset = key->offset;
6903 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6904 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6906 INIT_LIST_HEAD(&rec->list);
6911 static int process_block_group_item(struct block_group_tree *block_group_cache,
6912 struct btrfs_key *key,
6913 struct extent_buffer *eb, int slot)
6915 struct block_group_record *rec;
6918 rec = btrfs_new_block_group_record(eb, key, slot);
6919 ret = insert_block_group_record(block_group_cache, rec);
6921 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6922 rec->objectid, rec->offset);
6929 struct device_extent_record *
6930 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6931 struct btrfs_key *key, int slot)
6933 struct device_extent_record *rec;
6934 struct btrfs_dev_extent *ptr;
6936 rec = calloc(1, sizeof(*rec));
6938 fprintf(stderr, "memory allocation failed\n");
6942 rec->cache.objectid = key->objectid;
6943 rec->cache.start = key->offset;
6945 rec->generation = btrfs_header_generation(leaf);
6947 rec->objectid = key->objectid;
6948 rec->type = key->type;
6949 rec->offset = key->offset;
6951 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6952 rec->chunk_objecteid =
6953 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6955 btrfs_dev_extent_chunk_offset(leaf, ptr);
6956 rec->length = btrfs_dev_extent_length(leaf, ptr);
6957 rec->cache.size = rec->length;
6959 INIT_LIST_HEAD(&rec->chunk_list);
6960 INIT_LIST_HEAD(&rec->device_list);
6966 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6967 struct btrfs_key *key, struct extent_buffer *eb,
6970 struct device_extent_record *rec;
6973 rec = btrfs_new_device_extent_record(eb, key, slot);
6974 ret = insert_device_extent_record(dev_extent_cache, rec);
6977 "Device extent[%llu, %llu, %llu] existed.\n",
6978 rec->objectid, rec->offset, rec->length);
6985 static int process_extent_item(struct btrfs_root *root,
6986 struct cache_tree *extent_cache,
6987 struct extent_buffer *eb, int slot)
6989 struct btrfs_extent_item *ei;
6990 struct btrfs_extent_inline_ref *iref;
6991 struct btrfs_extent_data_ref *dref;
6992 struct btrfs_shared_data_ref *sref;
6993 struct btrfs_key key;
6994 struct extent_record tmpl;
6999 u32 item_size = btrfs_item_size_nr(eb, slot);
7005 btrfs_item_key_to_cpu(eb, &key, slot);
7007 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7009 num_bytes = root->fs_info->nodesize;
7011 num_bytes = key.offset;
7014 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7015 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7016 key.objectid, root->fs_info->sectorsize);
7019 if (item_size < sizeof(*ei)) {
7020 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7021 struct btrfs_extent_item_v0 *ei0;
7022 BUG_ON(item_size != sizeof(*ei0));
7023 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7024 refs = btrfs_extent_refs_v0(eb, ei0);
7028 memset(&tmpl, 0, sizeof(tmpl));
7029 tmpl.start = key.objectid;
7030 tmpl.nr = num_bytes;
7031 tmpl.extent_item_refs = refs;
7032 tmpl.metadata = metadata;
7034 tmpl.max_size = num_bytes;
7036 return add_extent_rec(extent_cache, &tmpl);
7039 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7040 refs = btrfs_extent_refs(eb, ei);
7041 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7045 if (metadata && num_bytes != root->fs_info->nodesize) {
7046 error("ignore invalid metadata extent, length %llu does not equal to %u",
7047 num_bytes, root->fs_info->nodesize);
7050 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7051 error("ignore invalid data extent, length %llu is not aligned to %u",
7052 num_bytes, root->fs_info->sectorsize);
7056 memset(&tmpl, 0, sizeof(tmpl));
7057 tmpl.start = key.objectid;
7058 tmpl.nr = num_bytes;
7059 tmpl.extent_item_refs = refs;
7060 tmpl.metadata = metadata;
7062 tmpl.max_size = num_bytes;
7063 add_extent_rec(extent_cache, &tmpl);
7065 ptr = (unsigned long)(ei + 1);
7066 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7067 key.type == BTRFS_EXTENT_ITEM_KEY)
7068 ptr += sizeof(struct btrfs_tree_block_info);
7070 end = (unsigned long)ei + item_size;
7072 iref = (struct btrfs_extent_inline_ref *)ptr;
7073 type = btrfs_extent_inline_ref_type(eb, iref);
7074 offset = btrfs_extent_inline_ref_offset(eb, iref);
7076 case BTRFS_TREE_BLOCK_REF_KEY:
7077 ret = add_tree_backref(extent_cache, key.objectid,
7081 "add_tree_backref failed (extent items tree block): %s",
7084 case BTRFS_SHARED_BLOCK_REF_KEY:
7085 ret = add_tree_backref(extent_cache, key.objectid,
7089 "add_tree_backref failed (extent items shared block): %s",
7092 case BTRFS_EXTENT_DATA_REF_KEY:
7093 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
7094 add_data_backref(extent_cache, key.objectid, 0,
7095 btrfs_extent_data_ref_root(eb, dref),
7096 btrfs_extent_data_ref_objectid(eb,
7098 btrfs_extent_data_ref_offset(eb, dref),
7099 btrfs_extent_data_ref_count(eb, dref),
7102 case BTRFS_SHARED_DATA_REF_KEY:
7103 sref = (struct btrfs_shared_data_ref *)(iref + 1);
7104 add_data_backref(extent_cache, key.objectid, offset,
7106 btrfs_shared_data_ref_count(eb, sref),
7110 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
7111 key.objectid, key.type, num_bytes);
7114 ptr += btrfs_extent_inline_ref_size(type);
7121 static int check_cache_range(struct btrfs_root *root,
7122 struct btrfs_block_group_cache *cache,
7123 u64 offset, u64 bytes)
7125 struct btrfs_free_space *entry;
7131 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
7132 bytenr = btrfs_sb_offset(i);
7133 ret = btrfs_rmap_block(root->fs_info,
7134 cache->key.objectid, bytenr, 0,
7135 &logical, &nr, &stripe_len);
7140 if (logical[nr] + stripe_len <= offset)
7142 if (offset + bytes <= logical[nr])
7144 if (logical[nr] == offset) {
7145 if (stripe_len >= bytes) {
7149 bytes -= stripe_len;
7150 offset += stripe_len;
7151 } else if (logical[nr] < offset) {
7152 if (logical[nr] + stripe_len >=
7157 bytes = (offset + bytes) -
7158 (logical[nr] + stripe_len);
7159 offset = logical[nr] + stripe_len;
7162 * Could be tricky, the super may land in the
7163 * middle of the area we're checking. First
7164 * check the easiest case, it's at the end.
7166 if (logical[nr] + stripe_len >=
7168 bytes = logical[nr] - offset;
7172 /* Check the left side */
7173 ret = check_cache_range(root, cache,
7175 logical[nr] - offset);
7181 /* Now we continue with the right side */
7182 bytes = (offset + bytes) -
7183 (logical[nr] + stripe_len);
7184 offset = logical[nr] + stripe_len;
7191 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7193 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7194 offset, offset+bytes);
7198 if (entry->offset != offset) {
7199 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7204 if (entry->bytes != bytes) {
7205 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7206 bytes, entry->bytes, offset);
7210 unlink_free_space(cache->free_space_ctl, entry);
7215 static int verify_space_cache(struct btrfs_root *root,
7216 struct btrfs_block_group_cache *cache)
7218 struct btrfs_path path;
7219 struct extent_buffer *leaf;
7220 struct btrfs_key key;
7224 root = root->fs_info->extent_root;
7226 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7228 btrfs_init_path(&path);
7229 key.objectid = last;
7231 key.type = BTRFS_EXTENT_ITEM_KEY;
7232 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7237 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7238 ret = btrfs_next_leaf(root, &path);
7246 leaf = path.nodes[0];
7247 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7248 if (key.objectid >= cache->key.offset + cache->key.objectid)
7250 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7251 key.type != BTRFS_METADATA_ITEM_KEY) {
7256 if (last == key.objectid) {
7257 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7258 last = key.objectid + key.offset;
7260 last = key.objectid + root->fs_info->nodesize;
7265 ret = check_cache_range(root, cache, last,
7266 key.objectid - last);
7269 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7270 last = key.objectid + key.offset;
7272 last = key.objectid + root->fs_info->nodesize;
7276 if (last < cache->key.objectid + cache->key.offset)
7277 ret = check_cache_range(root, cache, last,
7278 cache->key.objectid +
7279 cache->key.offset - last);
7282 btrfs_release_path(&path);
7285 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7286 fprintf(stderr, "There are still entries left in the space "
7294 static int check_space_cache(struct btrfs_root *root)
7296 struct btrfs_block_group_cache *cache;
7297 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7301 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7302 btrfs_super_generation(root->fs_info->super_copy) !=
7303 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7304 printf("cache and super generation don't match, space cache "
7305 "will be invalidated\n");
7309 if (ctx.progress_enabled) {
7310 ctx.tp = TASK_FREE_SPACE;
7311 task_start(ctx.info);
7315 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7319 start = cache->key.objectid + cache->key.offset;
7320 if (!cache->free_space_ctl) {
7321 if (btrfs_init_free_space_ctl(cache,
7322 root->fs_info->sectorsize)) {
7327 btrfs_remove_free_space_cache(cache);
7330 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7331 ret = exclude_super_stripes(root, cache);
7333 fprintf(stderr, "could not exclude super stripes: %s\n",
7338 ret = load_free_space_tree(root->fs_info, cache);
7339 free_excluded_extents(root, cache);
7341 fprintf(stderr, "could not load free space tree: %s\n",
7348 ret = load_free_space_cache(root->fs_info, cache);
7353 ret = verify_space_cache(root, cache);
7355 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7356 cache->key.objectid);
7361 task_stop(ctx.info);
7363 return error ? -EINVAL : 0;
7366 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7367 u64 num_bytes, unsigned long leaf_offset,
7368 struct extent_buffer *eb) {
7370 struct btrfs_fs_info *fs_info = root->fs_info;
7372 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7374 unsigned long csum_offset;
7378 u64 data_checked = 0;
7384 if (num_bytes % fs_info->sectorsize)
7387 data = malloc(num_bytes);
7391 while (offset < num_bytes) {
7394 read_len = num_bytes - offset;
7395 /* read as much space once a time */
7396 ret = read_extent_data(fs_info, data + offset,
7397 bytenr + offset, &read_len, mirror);
7401 /* verify every 4k data's checksum */
7402 while (data_checked < read_len) {
7404 tmp = offset + data_checked;
7406 csum = btrfs_csum_data((char *)data + tmp,
7407 csum, fs_info->sectorsize);
7408 btrfs_csum_final(csum, (u8 *)&csum);
7410 csum_offset = leaf_offset +
7411 tmp / fs_info->sectorsize * csum_size;
7412 read_extent_buffer(eb, (char *)&csum_expected,
7413 csum_offset, csum_size);
7414 /* try another mirror */
7415 if (csum != csum_expected) {
7416 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7417 mirror, bytenr + tmp,
7418 csum, csum_expected);
7419 num_copies = btrfs_num_copies(root->fs_info,
7421 if (mirror < num_copies - 1) {
7426 data_checked += fs_info->sectorsize;
7435 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7438 struct btrfs_path path;
7439 struct extent_buffer *leaf;
7440 struct btrfs_key key;
7443 btrfs_init_path(&path);
7444 key.objectid = bytenr;
7445 key.type = BTRFS_EXTENT_ITEM_KEY;
7446 key.offset = (u64)-1;
7449 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7452 fprintf(stderr, "Error looking up extent record %d\n", ret);
7453 btrfs_release_path(&path);
7456 if (path.slots[0] > 0) {
7459 ret = btrfs_prev_leaf(root, &path);
7462 } else if (ret > 0) {
7469 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7472 * Block group items come before extent items if they have the same
7473 * bytenr, so walk back one more just in case. Dear future traveller,
7474 * first congrats on mastering time travel. Now if it's not too much
7475 * trouble could you go back to 2006 and tell Chris to make the
7476 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7477 * EXTENT_ITEM_KEY please?
7479 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7480 if (path.slots[0] > 0) {
7483 ret = btrfs_prev_leaf(root, &path);
7486 } else if (ret > 0) {
7491 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7495 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7496 ret = btrfs_next_leaf(root, &path);
7498 fprintf(stderr, "Error going to next leaf "
7500 btrfs_release_path(&path);
7506 leaf = path.nodes[0];
7507 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7508 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7512 if (key.objectid + key.offset < bytenr) {
7516 if (key.objectid > bytenr + num_bytes)
7519 if (key.objectid == bytenr) {
7520 if (key.offset >= num_bytes) {
7524 num_bytes -= key.offset;
7525 bytenr += key.offset;
7526 } else if (key.objectid < bytenr) {
7527 if (key.objectid + key.offset >= bytenr + num_bytes) {
7531 num_bytes = (bytenr + num_bytes) -
7532 (key.objectid + key.offset);
7533 bytenr = key.objectid + key.offset;
7535 if (key.objectid + key.offset < bytenr + num_bytes) {
7536 u64 new_start = key.objectid + key.offset;
7537 u64 new_bytes = bytenr + num_bytes - new_start;
7540 * Weird case, the extent is in the middle of
7541 * our range, we'll have to search one side
7542 * and then the other. Not sure if this happens
7543 * in real life, but no harm in coding it up
7544 * anyway just in case.
7546 btrfs_release_path(&path);
7547 ret = check_extent_exists(root, new_start,
7550 fprintf(stderr, "Right section didn't "
7554 num_bytes = key.objectid - bytenr;
7557 num_bytes = key.objectid - bytenr;
7564 if (num_bytes && !ret) {
7565 fprintf(stderr, "There are no extents for csum range "
7566 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7570 btrfs_release_path(&path);
7574 static int check_csums(struct btrfs_root *root)
7576 struct btrfs_path path;
7577 struct extent_buffer *leaf;
7578 struct btrfs_key key;
7579 u64 offset = 0, num_bytes = 0;
7580 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7584 unsigned long leaf_offset;
7586 root = root->fs_info->csum_root;
7587 if (!extent_buffer_uptodate(root->node)) {
7588 fprintf(stderr, "No valid csum tree found\n");
7592 btrfs_init_path(&path);
7593 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7594 key.type = BTRFS_EXTENT_CSUM_KEY;
7596 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7598 fprintf(stderr, "Error searching csum tree %d\n", ret);
7599 btrfs_release_path(&path);
7603 if (ret > 0 && path.slots[0])
7608 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7609 ret = btrfs_next_leaf(root, &path);
7611 fprintf(stderr, "Error going to next leaf "
7618 leaf = path.nodes[0];
7620 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7621 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7626 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7627 csum_size) * root->fs_info->sectorsize;
7628 if (!check_data_csum)
7629 goto skip_csum_check;
7630 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7631 ret = check_extent_csums(root, key.offset, data_len,
7637 offset = key.offset;
7638 } else if (key.offset != offset + num_bytes) {
7639 ret = check_extent_exists(root, offset, num_bytes);
7641 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7642 "there is no extent record\n",
7643 offset, offset+num_bytes);
7646 offset = key.offset;
7649 num_bytes += data_len;
7653 btrfs_release_path(&path);
7657 static int is_dropped_key(struct btrfs_key *key,
7658 struct btrfs_key *drop_key) {
7659 if (key->objectid < drop_key->objectid)
7661 else if (key->objectid == drop_key->objectid) {
7662 if (key->type < drop_key->type)
7664 else if (key->type == drop_key->type) {
7665 if (key->offset < drop_key->offset)
7673 * Here are the rules for FULL_BACKREF.
7675 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7676 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7678 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7679 * if it happened after the relocation occurred since we'll have dropped the
7680 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7681 * have no real way to know for sure.
7683 * We process the blocks one root at a time, and we start from the lowest root
7684 * objectid and go to the highest. So we can just lookup the owner backref for
7685 * the record and if we don't find it then we know it doesn't exist and we have
7688 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7689 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7690 * be set or not and then we can check later once we've gathered all the refs.
7692 static int calc_extent_flag(struct cache_tree *extent_cache,
7693 struct extent_buffer *buf,
7694 struct root_item_record *ri,
7697 struct extent_record *rec;
7698 struct cache_extent *cache;
7699 struct tree_backref *tback;
7702 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7703 /* we have added this extent before */
7707 rec = container_of(cache, struct extent_record, cache);
7710 * Except file/reloc tree, we can not have
7713 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7718 if (buf->start == ri->bytenr)
7721 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7724 owner = btrfs_header_owner(buf);
7725 if (owner == ri->objectid)
7728 tback = find_tree_backref(rec, 0, owner);
7733 if (rec->flag_block_full_backref != FLAG_UNSET &&
7734 rec->flag_block_full_backref != 0)
7735 rec->bad_full_backref = 1;
7738 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739 if (rec->flag_block_full_backref != FLAG_UNSET &&
7740 rec->flag_block_full_backref != 1)
7741 rec->bad_full_backref = 1;
7745 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7747 fprintf(stderr, "Invalid key type(");
7748 print_key_type(stderr, 0, key_type);
7749 fprintf(stderr, ") found in root(");
7750 print_objectid(stderr, rootid, 0);
7751 fprintf(stderr, ")\n");
7755 * Check if the key is valid with its extent buffer.
7757 * This is a early check in case invalid key exists in a extent buffer
7758 * This is not comprehensive yet, but should prevent wrong key/item passed
7761 static int check_type_with_root(u64 rootid, u8 key_type)
7764 /* Only valid in chunk tree */
7765 case BTRFS_DEV_ITEM_KEY:
7766 case BTRFS_CHUNK_ITEM_KEY:
7767 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7770 /* valid in csum and log tree */
7771 case BTRFS_CSUM_TREE_OBJECTID:
7772 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7776 case BTRFS_EXTENT_ITEM_KEY:
7777 case BTRFS_METADATA_ITEM_KEY:
7778 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7779 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7782 case BTRFS_ROOT_ITEM_KEY:
7783 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7786 case BTRFS_DEV_EXTENT_KEY:
7787 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7793 report_mismatch_key_root(key_type, rootid);
7797 static int run_next_block(struct btrfs_root *root,
7798 struct block_info *bits,
7801 struct cache_tree *pending,
7802 struct cache_tree *seen,
7803 struct cache_tree *reada,
7804 struct cache_tree *nodes,
7805 struct cache_tree *extent_cache,
7806 struct cache_tree *chunk_cache,
7807 struct rb_root *dev_cache,
7808 struct block_group_tree *block_group_cache,
7809 struct device_extent_tree *dev_extent_cache,
7810 struct root_item_record *ri)
7812 struct btrfs_fs_info *fs_info = root->fs_info;
7813 struct extent_buffer *buf;
7814 struct extent_record *rec = NULL;
7825 struct btrfs_key key;
7826 struct cache_extent *cache;
7829 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7830 bits_nr, &reada_bits);
7835 for(i = 0; i < nritems; i++) {
7836 ret = add_cache_extent(reada, bits[i].start,
7841 /* fixme, get the parent transid */
7842 readahead_tree_block(fs_info, bits[i].start, 0);
7845 *last = bits[0].start;
7846 bytenr = bits[0].start;
7847 size = bits[0].size;
7849 cache = lookup_cache_extent(pending, bytenr, size);
7851 remove_cache_extent(pending, cache);
7854 cache = lookup_cache_extent(reada, bytenr, size);
7856 remove_cache_extent(reada, cache);
7859 cache = lookup_cache_extent(nodes, bytenr, size);
7861 remove_cache_extent(nodes, cache);
7864 cache = lookup_cache_extent(extent_cache, bytenr, size);
7866 rec = container_of(cache, struct extent_record, cache);
7867 gen = rec->parent_generation;
7870 /* fixme, get the real parent transid */
7871 buf = read_tree_block(root->fs_info, bytenr, gen);
7872 if (!extent_buffer_uptodate(buf)) {
7873 record_bad_block_io(root->fs_info,
7874 extent_cache, bytenr, size);
7878 nritems = btrfs_header_nritems(buf);
7881 if (!init_extent_tree) {
7882 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7883 btrfs_header_level(buf), 1, NULL,
7886 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7888 fprintf(stderr, "Couldn't calc extent flags\n");
7889 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7894 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7896 fprintf(stderr, "Couldn't calc extent flags\n");
7897 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7901 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7903 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7904 ri->objectid == btrfs_header_owner(buf)) {
7906 * Ok we got to this block from it's original owner and
7907 * we have FULL_BACKREF set. Relocation can leave
7908 * converted blocks over so this is altogether possible,
7909 * however it's not possible if the generation > the
7910 * last snapshot, so check for this case.
7912 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7913 btrfs_header_generation(buf) > ri->last_snapshot) {
7914 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7915 rec->bad_full_backref = 1;
7920 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7921 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7922 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7923 rec->bad_full_backref = 1;
7927 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7928 rec->flag_block_full_backref = 1;
7932 rec->flag_block_full_backref = 0;
7934 owner = btrfs_header_owner(buf);
7937 ret = check_block(root, extent_cache, buf, flags);
7941 if (btrfs_is_leaf(buf)) {
7942 btree_space_waste += btrfs_leaf_free_space(root, buf);
7943 for (i = 0; i < nritems; i++) {
7944 struct btrfs_file_extent_item *fi;
7945 btrfs_item_key_to_cpu(buf, &key, i);
7947 * Check key type against the leaf owner.
7948 * Could filter quite a lot of early error if
7951 if (check_type_with_root(btrfs_header_owner(buf),
7953 fprintf(stderr, "ignoring invalid key\n");
7956 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7957 process_extent_item(root, extent_cache, buf,
7961 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7962 process_extent_item(root, extent_cache, buf,
7966 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7968 btrfs_item_size_nr(buf, i);
7971 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7972 process_chunk_item(chunk_cache, &key, buf, i);
7975 if (key.type == BTRFS_DEV_ITEM_KEY) {
7976 process_device_item(dev_cache, &key, buf, i);
7979 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7980 process_block_group_item(block_group_cache,
7984 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7985 process_device_extent_item(dev_extent_cache,
7990 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7991 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7992 process_extent_ref_v0(extent_cache, buf, i);
7999 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8000 ret = add_tree_backref(extent_cache,
8001 key.objectid, 0, key.offset, 0);
8004 "add_tree_backref failed (leaf tree block): %s",
8008 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8009 ret = add_tree_backref(extent_cache,
8010 key.objectid, key.offset, 0, 0);
8013 "add_tree_backref failed (leaf shared block): %s",
8017 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8018 struct btrfs_extent_data_ref *ref;
8019 ref = btrfs_item_ptr(buf, i,
8020 struct btrfs_extent_data_ref);
8021 add_data_backref(extent_cache,
8023 btrfs_extent_data_ref_root(buf, ref),
8024 btrfs_extent_data_ref_objectid(buf,
8026 btrfs_extent_data_ref_offset(buf, ref),
8027 btrfs_extent_data_ref_count(buf, ref),
8028 0, root->fs_info->sectorsize);
8031 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8032 struct btrfs_shared_data_ref *ref;
8033 ref = btrfs_item_ptr(buf, i,
8034 struct btrfs_shared_data_ref);
8035 add_data_backref(extent_cache,
8036 key.objectid, key.offset, 0, 0, 0,
8037 btrfs_shared_data_ref_count(buf, ref),
8038 0, root->fs_info->sectorsize);
8041 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8042 struct bad_item *bad;
8044 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8048 bad = malloc(sizeof(struct bad_item));
8051 INIT_LIST_HEAD(&bad->list);
8052 memcpy(&bad->key, &key,
8053 sizeof(struct btrfs_key));
8054 bad->root_id = owner;
8055 list_add_tail(&bad->list, &delete_items);
8058 if (key.type != BTRFS_EXTENT_DATA_KEY)
8060 fi = btrfs_item_ptr(buf, i,
8061 struct btrfs_file_extent_item);
8062 if (btrfs_file_extent_type(buf, fi) ==
8063 BTRFS_FILE_EXTENT_INLINE)
8065 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8068 data_bytes_allocated +=
8069 btrfs_file_extent_disk_num_bytes(buf, fi);
8070 if (data_bytes_allocated < root->fs_info->sectorsize) {
8073 data_bytes_referenced +=
8074 btrfs_file_extent_num_bytes(buf, fi);
8075 add_data_backref(extent_cache,
8076 btrfs_file_extent_disk_bytenr(buf, fi),
8077 parent, owner, key.objectid, key.offset -
8078 btrfs_file_extent_offset(buf, fi), 1, 1,
8079 btrfs_file_extent_disk_num_bytes(buf, fi));
8083 struct btrfs_key first_key;
8085 first_key.objectid = 0;
8088 btrfs_item_key_to_cpu(buf, &first_key, 0);
8089 level = btrfs_header_level(buf);
8090 for (i = 0; i < nritems; i++) {
8091 struct extent_record tmpl;
8093 ptr = btrfs_node_blockptr(buf, i);
8094 size = root->fs_info->nodesize;
8095 btrfs_node_key_to_cpu(buf, &key, i);
8097 if ((level == ri->drop_level)
8098 && is_dropped_key(&key, &ri->drop_key)) {
8103 memset(&tmpl, 0, sizeof(tmpl));
8104 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
8105 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
8110 tmpl.max_size = size;
8111 ret = add_extent_rec(extent_cache, &tmpl);
8115 ret = add_tree_backref(extent_cache, ptr, parent,
8119 "add_tree_backref failed (non-leaf block): %s",
8125 add_pending(nodes, seen, ptr, size);
8127 add_pending(pending, seen, ptr, size);
8130 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
8131 nritems) * sizeof(struct btrfs_key_ptr);
8133 total_btree_bytes += buf->len;
8134 if (fs_root_objectid(btrfs_header_owner(buf)))
8135 total_fs_tree_bytes += buf->len;
8136 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
8137 total_extent_tree_bytes += buf->len;
8139 free_extent_buffer(buf);
8143 static int add_root_to_pending(struct extent_buffer *buf,
8144 struct cache_tree *extent_cache,
8145 struct cache_tree *pending,
8146 struct cache_tree *seen,
8147 struct cache_tree *nodes,
8150 struct extent_record tmpl;
8153 if (btrfs_header_level(buf) > 0)
8154 add_pending(nodes, seen, buf->start, buf->len);
8156 add_pending(pending, seen, buf->start, buf->len);
8158 memset(&tmpl, 0, sizeof(tmpl));
8159 tmpl.start = buf->start;
8164 tmpl.max_size = buf->len;
8165 add_extent_rec(extent_cache, &tmpl);
8167 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
8168 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8169 ret = add_tree_backref(extent_cache, buf->start, buf->start,
8172 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
8177 /* as we fix the tree, we might be deleting blocks that
8178 * we're tracking for repair. This hook makes sure we
8179 * remove any backrefs for blocks as we are fixing them.
8181 static int free_extent_hook(struct btrfs_trans_handle *trans,
8182 struct btrfs_root *root,
8183 u64 bytenr, u64 num_bytes, u64 parent,
8184 u64 root_objectid, u64 owner, u64 offset,
8187 struct extent_record *rec;
8188 struct cache_extent *cache;
8190 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8192 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8193 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8197 rec = container_of(cache, struct extent_record, cache);
8199 struct data_backref *back;
8200 back = find_data_backref(rec, parent, root_objectid, owner,
8201 offset, 1, bytenr, num_bytes);
8204 if (back->node.found_ref) {
8205 back->found_ref -= refs_to_drop;
8207 rec->refs -= refs_to_drop;
8209 if (back->node.found_extent_tree) {
8210 back->num_refs -= refs_to_drop;
8211 if (rec->extent_item_refs)
8212 rec->extent_item_refs -= refs_to_drop;
8214 if (back->found_ref == 0)
8215 back->node.found_ref = 0;
8216 if (back->num_refs == 0)
8217 back->node.found_extent_tree = 0;
8219 if (!back->node.found_extent_tree && back->node.found_ref) {
8220 list_del(&back->node.list);
8224 struct tree_backref *back;
8225 back = find_tree_backref(rec, parent, root_objectid);
8228 if (back->node.found_ref) {
8231 back->node.found_ref = 0;
8233 if (back->node.found_extent_tree) {
8234 if (rec->extent_item_refs)
8235 rec->extent_item_refs--;
8236 back->node.found_extent_tree = 0;
8238 if (!back->node.found_extent_tree && back->node.found_ref) {
8239 list_del(&back->node.list);
8243 maybe_free_extent_rec(extent_cache, rec);
8248 static int delete_extent_records(struct btrfs_trans_handle *trans,
8249 struct btrfs_root *root,
8250 struct btrfs_path *path,
8253 struct btrfs_key key;
8254 struct btrfs_key found_key;
8255 struct extent_buffer *leaf;
8260 key.objectid = bytenr;
8262 key.offset = (u64)-1;
8265 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8272 if (path->slots[0] == 0)
8278 leaf = path->nodes[0];
8279 slot = path->slots[0];
8281 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8282 if (found_key.objectid != bytenr)
8285 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8286 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8287 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8288 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8289 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8290 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8291 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8292 btrfs_release_path(path);
8293 if (found_key.type == 0) {
8294 if (found_key.offset == 0)
8296 key.offset = found_key.offset - 1;
8297 key.type = found_key.type;
8299 key.type = found_key.type - 1;
8300 key.offset = (u64)-1;
8304 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8305 found_key.objectid, found_key.type, found_key.offset);
8307 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8310 btrfs_release_path(path);
8312 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8313 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8314 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8315 found_key.offset : root->fs_info->nodesize;
8317 ret = btrfs_update_block_group(trans, root, bytenr,
8324 btrfs_release_path(path);
8329 * for a single backref, this will allocate a new extent
8330 * and add the backref to it.
8332 static int record_extent(struct btrfs_trans_handle *trans,
8333 struct btrfs_fs_info *info,
8334 struct btrfs_path *path,
8335 struct extent_record *rec,
8336 struct extent_backref *back,
8337 int allocated, u64 flags)
8340 struct btrfs_root *extent_root = info->extent_root;
8341 struct extent_buffer *leaf;
8342 struct btrfs_key ins_key;
8343 struct btrfs_extent_item *ei;
8344 struct data_backref *dback;
8345 struct btrfs_tree_block_info *bi;
8348 rec->max_size = max_t(u64, rec->max_size,
8352 u32 item_size = sizeof(*ei);
8355 item_size += sizeof(*bi);
8357 ins_key.objectid = rec->start;
8358 ins_key.offset = rec->max_size;
8359 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8361 ret = btrfs_insert_empty_item(trans, extent_root, path,
8362 &ins_key, item_size);
8366 leaf = path->nodes[0];
8367 ei = btrfs_item_ptr(leaf, path->slots[0],
8368 struct btrfs_extent_item);
8370 btrfs_set_extent_refs(leaf, ei, 0);
8371 btrfs_set_extent_generation(leaf, ei, rec->generation);
8373 if (back->is_data) {
8374 btrfs_set_extent_flags(leaf, ei,
8375 BTRFS_EXTENT_FLAG_DATA);
8377 struct btrfs_disk_key copy_key;;
8379 bi = (struct btrfs_tree_block_info *)(ei + 1);
8380 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8383 btrfs_set_disk_key_objectid(©_key,
8384 rec->info_objectid);
8385 btrfs_set_disk_key_type(©_key, 0);
8386 btrfs_set_disk_key_offset(©_key, 0);
8388 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8389 btrfs_set_tree_block_key(leaf, bi, ©_key);
8391 btrfs_set_extent_flags(leaf, ei,
8392 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8395 btrfs_mark_buffer_dirty(leaf);
8396 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8397 rec->max_size, 1, 0);
8400 btrfs_release_path(path);
8403 if (back->is_data) {
8407 dback = to_data_backref(back);
8408 if (back->full_backref)
8409 parent = dback->parent;
8413 for (i = 0; i < dback->found_ref; i++) {
8414 /* if parent != 0, we're doing a full backref
8415 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8416 * just makes the backref allocator create a data
8419 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8420 rec->start, rec->max_size,
8424 BTRFS_FIRST_FREE_OBJECTID :
8430 fprintf(stderr, "adding new data backref"
8431 " on %llu %s %llu owner %llu"
8432 " offset %llu found %d\n",
8433 (unsigned long long)rec->start,
8434 back->full_backref ?
8436 back->full_backref ?
8437 (unsigned long long)parent :
8438 (unsigned long long)dback->root,
8439 (unsigned long long)dback->owner,
8440 (unsigned long long)dback->offset,
8444 struct tree_backref *tback;
8446 tback = to_tree_backref(back);
8447 if (back->full_backref)
8448 parent = tback->parent;
8452 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8453 rec->start, rec->max_size,
8454 parent, tback->root, 0, 0);
8455 fprintf(stderr, "adding new tree backref on "
8456 "start %llu len %llu parent %llu root %llu\n",
8457 rec->start, rec->max_size, parent, tback->root);
8460 btrfs_release_path(path);
8464 static struct extent_entry *find_entry(struct list_head *entries,
8465 u64 bytenr, u64 bytes)
8467 struct extent_entry *entry = NULL;
8469 list_for_each_entry(entry, entries, list) {
8470 if (entry->bytenr == bytenr && entry->bytes == bytes)
8477 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8479 struct extent_entry *entry, *best = NULL, *prev = NULL;
8481 list_for_each_entry(entry, entries, list) {
8483 * If there are as many broken entries as entries then we know
8484 * not to trust this particular entry.
8486 if (entry->broken == entry->count)
8490 * Special case, when there are only two entries and 'best' is
8500 * If our current entry == best then we can't be sure our best
8501 * is really the best, so we need to keep searching.
8503 if (best && best->count == entry->count) {
8509 /* Prev == entry, not good enough, have to keep searching */
8510 if (!prev->broken && prev->count == entry->count)
8514 best = (prev->count > entry->count) ? prev : entry;
8515 else if (best->count < entry->count)
8523 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8524 struct data_backref *dback, struct extent_entry *entry)
8526 struct btrfs_trans_handle *trans;
8527 struct btrfs_root *root;
8528 struct btrfs_file_extent_item *fi;
8529 struct extent_buffer *leaf;
8530 struct btrfs_key key;
8534 key.objectid = dback->root;
8535 key.type = BTRFS_ROOT_ITEM_KEY;
8536 key.offset = (u64)-1;
8537 root = btrfs_read_fs_root(info, &key);
8539 fprintf(stderr, "Couldn't find root for our ref\n");
8544 * The backref points to the original offset of the extent if it was
8545 * split, so we need to search down to the offset we have and then walk
8546 * forward until we find the backref we're looking for.
8548 key.objectid = dback->owner;
8549 key.type = BTRFS_EXTENT_DATA_KEY;
8550 key.offset = dback->offset;
8551 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8553 fprintf(stderr, "Error looking up ref %d\n", ret);
8558 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8559 ret = btrfs_next_leaf(root, path);
8561 fprintf(stderr, "Couldn't find our ref, next\n");
8565 leaf = path->nodes[0];
8566 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8567 if (key.objectid != dback->owner ||
8568 key.type != BTRFS_EXTENT_DATA_KEY) {
8569 fprintf(stderr, "Couldn't find our ref, search\n");
8572 fi = btrfs_item_ptr(leaf, path->slots[0],
8573 struct btrfs_file_extent_item);
8574 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8575 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8577 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8582 btrfs_release_path(path);
8584 trans = btrfs_start_transaction(root, 1);
8586 return PTR_ERR(trans);
8589 * Ok we have the key of the file extent we want to fix, now we can cow
8590 * down to the thing and fix it.
8592 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8594 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8595 key.objectid, key.type, key.offset, ret);
8599 fprintf(stderr, "Well that's odd, we just found this key "
8600 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8605 leaf = path->nodes[0];
8606 fi = btrfs_item_ptr(leaf, path->slots[0],
8607 struct btrfs_file_extent_item);
8609 if (btrfs_file_extent_compression(leaf, fi) &&
8610 dback->disk_bytenr != entry->bytenr) {
8611 fprintf(stderr, "Ref doesn't match the record start and is "
8612 "compressed, please take a btrfs-image of this file "
8613 "system and send it to a btrfs developer so they can "
8614 "complete this functionality for bytenr %Lu\n",
8615 dback->disk_bytenr);
8620 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8621 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8622 } else if (dback->disk_bytenr > entry->bytenr) {
8623 u64 off_diff, offset;
8625 off_diff = dback->disk_bytenr - entry->bytenr;
8626 offset = btrfs_file_extent_offset(leaf, fi);
8627 if (dback->disk_bytenr + offset +
8628 btrfs_file_extent_num_bytes(leaf, fi) >
8629 entry->bytenr + entry->bytes) {
8630 fprintf(stderr, "Ref is past the entry end, please "
8631 "take a btrfs-image of this file system and "
8632 "send it to a btrfs developer, ref %Lu\n",
8633 dback->disk_bytenr);
8638 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8639 btrfs_set_file_extent_offset(leaf, fi, offset);
8640 } else if (dback->disk_bytenr < entry->bytenr) {
8643 offset = btrfs_file_extent_offset(leaf, fi);
8644 if (dback->disk_bytenr + offset < entry->bytenr) {
8645 fprintf(stderr, "Ref is before the entry start, please"
8646 " take a btrfs-image of this file system and "
8647 "send it to a btrfs developer, ref %Lu\n",
8648 dback->disk_bytenr);
8653 offset += dback->disk_bytenr;
8654 offset -= entry->bytenr;
8655 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8656 btrfs_set_file_extent_offset(leaf, fi, offset);
8659 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8662 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8663 * only do this if we aren't using compression, otherwise it's a
8666 if (!btrfs_file_extent_compression(leaf, fi))
8667 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8669 printf("ram bytes may be wrong?\n");
8670 btrfs_mark_buffer_dirty(leaf);
8672 err = btrfs_commit_transaction(trans, root);
8673 btrfs_release_path(path);
8674 return ret ? ret : err;
8677 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8678 struct extent_record *rec)
8680 struct extent_backref *back;
8681 struct data_backref *dback;
8682 struct extent_entry *entry, *best = NULL;
8685 int broken_entries = 0;
8690 * Metadata is easy and the backrefs should always agree on bytenr and
8691 * size, if not we've got bigger issues.
8696 list_for_each_entry(back, &rec->backrefs, list) {
8697 if (back->full_backref || !back->is_data)
8700 dback = to_data_backref(back);
8703 * We only pay attention to backrefs that we found a real
8706 if (dback->found_ref == 0)
8710 * For now we only catch when the bytes don't match, not the
8711 * bytenr. We can easily do this at the same time, but I want
8712 * to have a fs image to test on before we just add repair
8713 * functionality willy-nilly so we know we won't screw up the
8717 entry = find_entry(&entries, dback->disk_bytenr,
8720 entry = malloc(sizeof(struct extent_entry));
8725 memset(entry, 0, sizeof(*entry));
8726 entry->bytenr = dback->disk_bytenr;
8727 entry->bytes = dback->bytes;
8728 list_add_tail(&entry->list, &entries);
8733 * If we only have on entry we may think the entries agree when
8734 * in reality they don't so we have to do some extra checking.
8736 if (dback->disk_bytenr != rec->start ||
8737 dback->bytes != rec->nr || back->broken)
8748 /* Yay all the backrefs agree, carry on good sir */
8749 if (nr_entries <= 1 && !mismatch)
8752 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8753 "%Lu\n", rec->start);
8756 * First we want to see if the backrefs can agree amongst themselves who
8757 * is right, so figure out which one of the entries has the highest
8760 best = find_most_right_entry(&entries);
8763 * Ok so we may have an even split between what the backrefs think, so
8764 * this is where we use the extent ref to see what it thinks.
8767 entry = find_entry(&entries, rec->start, rec->nr);
8768 if (!entry && (!broken_entries || !rec->found_rec)) {
8769 fprintf(stderr, "Backrefs don't agree with each other "
8770 "and extent record doesn't agree with anybody,"
8771 " so we can't fix bytenr %Lu bytes %Lu\n",
8772 rec->start, rec->nr);
8775 } else if (!entry) {
8777 * Ok our backrefs were broken, we'll assume this is the
8778 * correct value and add an entry for this range.
8780 entry = malloc(sizeof(struct extent_entry));
8785 memset(entry, 0, sizeof(*entry));
8786 entry->bytenr = rec->start;
8787 entry->bytes = rec->nr;
8788 list_add_tail(&entry->list, &entries);
8792 best = find_most_right_entry(&entries);
8794 fprintf(stderr, "Backrefs and extent record evenly "
8795 "split on who is right, this is going to "
8796 "require user input to fix bytenr %Lu bytes "
8797 "%Lu\n", rec->start, rec->nr);
8804 * I don't think this can happen currently as we'll abort() if we catch
8805 * this case higher up, but in case somebody removes that we still can't
8806 * deal with it properly here yet, so just bail out of that's the case.
8808 if (best->bytenr != rec->start) {
8809 fprintf(stderr, "Extent start and backref starts don't match, "
8810 "please use btrfs-image on this file system and send "
8811 "it to a btrfs developer so they can make fsck fix "
8812 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8813 rec->start, rec->nr);
8819 * Ok great we all agreed on an extent record, let's go find the real
8820 * references and fix up the ones that don't match.
8822 list_for_each_entry(back, &rec->backrefs, list) {
8823 if (back->full_backref || !back->is_data)
8826 dback = to_data_backref(back);
8829 * Still ignoring backrefs that don't have a real ref attached
8832 if (dback->found_ref == 0)
8835 if (dback->bytes == best->bytes &&
8836 dback->disk_bytenr == best->bytenr)
8839 ret = repair_ref(info, path, dback, best);
8845 * Ok we messed with the actual refs, which means we need to drop our
8846 * entire cache and go back and rescan. I know this is a huge pain and
8847 * adds a lot of extra work, but it's the only way to be safe. Once all
8848 * the backrefs agree we may not need to do anything to the extent
8853 while (!list_empty(&entries)) {
8854 entry = list_entry(entries.next, struct extent_entry, list);
8855 list_del_init(&entry->list);
8861 static int process_duplicates(struct cache_tree *extent_cache,
8862 struct extent_record *rec)
8864 struct extent_record *good, *tmp;
8865 struct cache_extent *cache;
8869 * If we found a extent record for this extent then return, or if we
8870 * have more than one duplicate we are likely going to need to delete
8873 if (rec->found_rec || rec->num_duplicates > 1)
8876 /* Shouldn't happen but just in case */
8877 BUG_ON(!rec->num_duplicates);
8880 * So this happens if we end up with a backref that doesn't match the
8881 * actual extent entry. So either the backref is bad or the extent
8882 * entry is bad. Either way we want to have the extent_record actually
8883 * reflect what we found in the extent_tree, so we need to take the
8884 * duplicate out and use that as the extent_record since the only way we
8885 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8887 remove_cache_extent(extent_cache, &rec->cache);
8889 good = to_extent_record(rec->dups.next);
8890 list_del_init(&good->list);
8891 INIT_LIST_HEAD(&good->backrefs);
8892 INIT_LIST_HEAD(&good->dups);
8893 good->cache.start = good->start;
8894 good->cache.size = good->nr;
8895 good->content_checked = 0;
8896 good->owner_ref_checked = 0;
8897 good->num_duplicates = 0;
8898 good->refs = rec->refs;
8899 list_splice_init(&rec->backrefs, &good->backrefs);
8901 cache = lookup_cache_extent(extent_cache, good->start,
8905 tmp = container_of(cache, struct extent_record, cache);
8908 * If we find another overlapping extent and it's found_rec is
8909 * set then it's a duplicate and we need to try and delete
8912 if (tmp->found_rec || tmp->num_duplicates > 0) {
8913 if (list_empty(&good->list))
8914 list_add_tail(&good->list,
8915 &duplicate_extents);
8916 good->num_duplicates += tmp->num_duplicates + 1;
8917 list_splice_init(&tmp->dups, &good->dups);
8918 list_del_init(&tmp->list);
8919 list_add_tail(&tmp->list, &good->dups);
8920 remove_cache_extent(extent_cache, &tmp->cache);
8925 * Ok we have another non extent item backed extent rec, so lets
8926 * just add it to this extent and carry on like we did above.
8928 good->refs += tmp->refs;
8929 list_splice_init(&tmp->backrefs, &good->backrefs);
8930 remove_cache_extent(extent_cache, &tmp->cache);
8933 ret = insert_cache_extent(extent_cache, &good->cache);
8936 return good->num_duplicates ? 0 : 1;
8939 static int delete_duplicate_records(struct btrfs_root *root,
8940 struct extent_record *rec)
8942 struct btrfs_trans_handle *trans;
8943 LIST_HEAD(delete_list);
8944 struct btrfs_path path;
8945 struct extent_record *tmp, *good, *n;
8948 struct btrfs_key key;
8950 btrfs_init_path(&path);
8953 /* Find the record that covers all of the duplicates. */
8954 list_for_each_entry(tmp, &rec->dups, list) {
8955 if (good->start < tmp->start)
8957 if (good->nr > tmp->nr)
8960 if (tmp->start + tmp->nr < good->start + good->nr) {
8961 fprintf(stderr, "Ok we have overlapping extents that "
8962 "aren't completely covered by each other, this "
8963 "is going to require more careful thought. "
8964 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8965 tmp->start, tmp->nr, good->start, good->nr);
8972 list_add_tail(&rec->list, &delete_list);
8974 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8977 list_move_tail(&tmp->list, &delete_list);
8980 root = root->fs_info->extent_root;
8981 trans = btrfs_start_transaction(root, 1);
8982 if (IS_ERR(trans)) {
8983 ret = PTR_ERR(trans);
8987 list_for_each_entry(tmp, &delete_list, list) {
8988 if (tmp->found_rec == 0)
8990 key.objectid = tmp->start;
8991 key.type = BTRFS_EXTENT_ITEM_KEY;
8992 key.offset = tmp->nr;
8994 /* Shouldn't happen but just in case */
8995 if (tmp->metadata) {
8996 fprintf(stderr, "Well this shouldn't happen, extent "
8997 "record overlaps but is metadata? "
8998 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9002 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9008 ret = btrfs_del_item(trans, root, &path);
9011 btrfs_release_path(&path);
9014 err = btrfs_commit_transaction(trans, root);
9018 while (!list_empty(&delete_list)) {
9019 tmp = to_extent_record(delete_list.next);
9020 list_del_init(&tmp->list);
9026 while (!list_empty(&rec->dups)) {
9027 tmp = to_extent_record(rec->dups.next);
9028 list_del_init(&tmp->list);
9032 btrfs_release_path(&path);
9034 if (!ret && !nr_del)
9035 rec->num_duplicates = 0;
9037 return ret ? ret : nr_del;
9040 static int find_possible_backrefs(struct btrfs_fs_info *info,
9041 struct btrfs_path *path,
9042 struct cache_tree *extent_cache,
9043 struct extent_record *rec)
9045 struct btrfs_root *root;
9046 struct extent_backref *back;
9047 struct data_backref *dback;
9048 struct cache_extent *cache;
9049 struct btrfs_file_extent_item *fi;
9050 struct btrfs_key key;
9054 list_for_each_entry(back, &rec->backrefs, list) {
9055 /* Don't care about full backrefs (poor unloved backrefs) */
9056 if (back->full_backref || !back->is_data)
9059 dback = to_data_backref(back);
9061 /* We found this one, we don't need to do a lookup */
9062 if (dback->found_ref)
9065 key.objectid = dback->root;
9066 key.type = BTRFS_ROOT_ITEM_KEY;
9067 key.offset = (u64)-1;
9069 root = btrfs_read_fs_root(info, &key);
9071 /* No root, definitely a bad ref, skip */
9072 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9074 /* Other err, exit */
9076 return PTR_ERR(root);
9078 key.objectid = dback->owner;
9079 key.type = BTRFS_EXTENT_DATA_KEY;
9080 key.offset = dback->offset;
9081 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9083 btrfs_release_path(path);
9086 /* Didn't find it, we can carry on */
9091 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
9092 struct btrfs_file_extent_item);
9093 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
9094 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
9095 btrfs_release_path(path);
9096 cache = lookup_cache_extent(extent_cache, bytenr, 1);
9098 struct extent_record *tmp;
9099 tmp = container_of(cache, struct extent_record, cache);
9102 * If we found an extent record for the bytenr for this
9103 * particular backref then we can't add it to our
9104 * current extent record. We only want to add backrefs
9105 * that don't have a corresponding extent item in the
9106 * extent tree since they likely belong to this record
9107 * and we need to fix it if it doesn't match bytenrs.
9113 dback->found_ref += 1;
9114 dback->disk_bytenr = bytenr;
9115 dback->bytes = bytes;
9118 * Set this so the verify backref code knows not to trust the
9119 * values in this backref.
9128 * Record orphan data ref into corresponding root.
9130 * Return 0 if the extent item contains data ref and recorded.
9131 * Return 1 if the extent item contains no useful data ref
9132 * On that case, it may contains only shared_dataref or metadata backref
9133 * or the file extent exists(this should be handled by the extent bytenr
9135 * Return <0 if something goes wrong.
9137 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
9138 struct extent_record *rec)
9140 struct btrfs_key key;
9141 struct btrfs_root *dest_root;
9142 struct extent_backref *back;
9143 struct data_backref *dback;
9144 struct orphan_data_extent *orphan;
9145 struct btrfs_path path;
9146 int recorded_data_ref = 0;
9151 btrfs_init_path(&path);
9152 list_for_each_entry(back, &rec->backrefs, list) {
9153 if (back->full_backref || !back->is_data ||
9154 !back->found_extent_tree)
9156 dback = to_data_backref(back);
9157 if (dback->found_ref)
9159 key.objectid = dback->root;
9160 key.type = BTRFS_ROOT_ITEM_KEY;
9161 key.offset = (u64)-1;
9163 dest_root = btrfs_read_fs_root(fs_info, &key);
9165 /* For non-exist root we just skip it */
9166 if (IS_ERR(dest_root) || !dest_root)
9169 key.objectid = dback->owner;
9170 key.type = BTRFS_EXTENT_DATA_KEY;
9171 key.offset = dback->offset;
9173 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
9174 btrfs_release_path(&path);
9176 * For ret < 0, it's OK since the fs-tree may be corrupted,
9177 * we need to record it for inode/file extent rebuild.
9178 * For ret > 0, we record it only for file extent rebuild.
9179 * For ret == 0, the file extent exists but only bytenr
9180 * mismatch, let the original bytenr fix routine to handle,
9186 orphan = malloc(sizeof(*orphan));
9191 INIT_LIST_HEAD(&orphan->list);
9192 orphan->root = dback->root;
9193 orphan->objectid = dback->owner;
9194 orphan->offset = dback->offset;
9195 orphan->disk_bytenr = rec->cache.start;
9196 orphan->disk_len = rec->cache.size;
9197 list_add(&dest_root->orphan_data_extents, &orphan->list);
9198 recorded_data_ref = 1;
9201 btrfs_release_path(&path);
9203 return !recorded_data_ref;
9209 * when an incorrect extent item is found, this will delete
9210 * all of the existing entries for it and recreate them
9211 * based on what the tree scan found.
9213 static int fixup_extent_refs(struct btrfs_fs_info *info,
9214 struct cache_tree *extent_cache,
9215 struct extent_record *rec)
9217 struct btrfs_trans_handle *trans = NULL;
9219 struct btrfs_path path;
9220 struct list_head *cur = rec->backrefs.next;
9221 struct cache_extent *cache;
9222 struct extent_backref *back;
9226 if (rec->flag_block_full_backref)
9227 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9229 btrfs_init_path(&path);
9230 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9232 * Sometimes the backrefs themselves are so broken they don't
9233 * get attached to any meaningful rec, so first go back and
9234 * check any of our backrefs that we couldn't find and throw
9235 * them into the list if we find the backref so that
9236 * verify_backrefs can figure out what to do.
9238 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9243 /* step one, make sure all of the backrefs agree */
9244 ret = verify_backrefs(info, &path, rec);
9248 trans = btrfs_start_transaction(info->extent_root, 1);
9249 if (IS_ERR(trans)) {
9250 ret = PTR_ERR(trans);
9254 /* step two, delete all the existing records */
9255 ret = delete_extent_records(trans, info->extent_root, &path,
9261 /* was this block corrupt? If so, don't add references to it */
9262 cache = lookup_cache_extent(info->corrupt_blocks,
9263 rec->start, rec->max_size);
9269 /* step three, recreate all the refs we did find */
9270 while(cur != &rec->backrefs) {
9271 back = to_extent_backref(cur);
9275 * if we didn't find any references, don't create a
9278 if (!back->found_ref)
9281 rec->bad_full_backref = 0;
9282 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9290 int err = btrfs_commit_transaction(trans, info->extent_root);
9296 fprintf(stderr, "Repaired extent references for %llu\n",
9297 (unsigned long long)rec->start);
9299 btrfs_release_path(&path);
9303 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9304 struct extent_record *rec)
9306 struct btrfs_trans_handle *trans;
9307 struct btrfs_root *root = fs_info->extent_root;
9308 struct btrfs_path path;
9309 struct btrfs_extent_item *ei;
9310 struct btrfs_key key;
9314 key.objectid = rec->start;
9315 if (rec->metadata) {
9316 key.type = BTRFS_METADATA_ITEM_KEY;
9317 key.offset = rec->info_level;
9319 key.type = BTRFS_EXTENT_ITEM_KEY;
9320 key.offset = rec->max_size;
9323 trans = btrfs_start_transaction(root, 0);
9325 return PTR_ERR(trans);
9327 btrfs_init_path(&path);
9328 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9330 btrfs_release_path(&path);
9331 btrfs_commit_transaction(trans, root);
9334 fprintf(stderr, "Didn't find extent for %llu\n",
9335 (unsigned long long)rec->start);
9336 btrfs_release_path(&path);
9337 btrfs_commit_transaction(trans, root);
9341 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9342 struct btrfs_extent_item);
9343 flags = btrfs_extent_flags(path.nodes[0], ei);
9344 if (rec->flag_block_full_backref) {
9345 fprintf(stderr, "setting full backref on %llu\n",
9346 (unsigned long long)key.objectid);
9347 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9349 fprintf(stderr, "clearing full backref on %llu\n",
9350 (unsigned long long)key.objectid);
9351 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9353 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9354 btrfs_mark_buffer_dirty(path.nodes[0]);
9355 btrfs_release_path(&path);
9356 ret = btrfs_commit_transaction(trans, root);
9358 fprintf(stderr, "Repaired extent flags for %llu\n",
9359 (unsigned long long)rec->start);
9364 /* right now we only prune from the extent allocation tree */
9365 static int prune_one_block(struct btrfs_trans_handle *trans,
9366 struct btrfs_fs_info *info,
9367 struct btrfs_corrupt_block *corrupt)
9370 struct btrfs_path path;
9371 struct extent_buffer *eb;
9375 int level = corrupt->level + 1;
9377 btrfs_init_path(&path);
9379 /* we want to stop at the parent to our busted block */
9380 path.lowest_level = level;
9382 ret = btrfs_search_slot(trans, info->extent_root,
9383 &corrupt->key, &path, -1, 1);
9388 eb = path.nodes[level];
9395 * hopefully the search gave us the block we want to prune,
9396 * lets try that first
9398 slot = path.slots[level];
9399 found = btrfs_node_blockptr(eb, slot);
9400 if (found == corrupt->cache.start)
9403 nritems = btrfs_header_nritems(eb);
9405 /* the search failed, lets scan this node and hope we find it */
9406 for (slot = 0; slot < nritems; slot++) {
9407 found = btrfs_node_blockptr(eb, slot);
9408 if (found == corrupt->cache.start)
9412 * we couldn't find the bad block. TODO, search all the nodes for pointers
9415 if (eb == info->extent_root->node) {
9420 btrfs_release_path(&path);
9425 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9426 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9429 btrfs_release_path(&path);
9433 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9435 struct btrfs_trans_handle *trans = NULL;
9436 struct cache_extent *cache;
9437 struct btrfs_corrupt_block *corrupt;
9440 cache = search_cache_extent(info->corrupt_blocks, 0);
9444 trans = btrfs_start_transaction(info->extent_root, 1);
9446 return PTR_ERR(trans);
9448 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9449 prune_one_block(trans, info, corrupt);
9450 remove_cache_extent(info->corrupt_blocks, cache);
9453 return btrfs_commit_transaction(trans, info->extent_root);
9457 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9459 struct btrfs_block_group_cache *cache;
9464 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9465 &start, &end, EXTENT_DIRTY);
9468 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9473 cache = btrfs_lookup_first_block_group(fs_info, start);
9478 start = cache->key.objectid + cache->key.offset;
9482 static int check_extent_refs(struct btrfs_root *root,
9483 struct cache_tree *extent_cache)
9485 struct extent_record *rec;
9486 struct cache_extent *cache;
9492 * if we're doing a repair, we have to make sure
9493 * we don't allocate from the problem extents.
9494 * In the worst case, this will be all the
9497 cache = search_cache_extent(extent_cache, 0);
9499 rec = container_of(cache, struct extent_record, cache);
9500 set_extent_dirty(root->fs_info->excluded_extents,
9502 rec->start + rec->max_size - 1);
9503 cache = next_cache_extent(cache);
9506 /* pin down all the corrupted blocks too */
9507 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9509 set_extent_dirty(root->fs_info->excluded_extents,
9511 cache->start + cache->size - 1);
9512 cache = next_cache_extent(cache);
9514 prune_corrupt_blocks(root->fs_info);
9515 reset_cached_block_groups(root->fs_info);
9518 reset_cached_block_groups(root->fs_info);
9521 * We need to delete any duplicate entries we find first otherwise we
9522 * could mess up the extent tree when we have backrefs that actually
9523 * belong to a different extent item and not the weird duplicate one.
9525 while (repair && !list_empty(&duplicate_extents)) {
9526 rec = to_extent_record(duplicate_extents.next);
9527 list_del_init(&rec->list);
9529 /* Sometimes we can find a backref before we find an actual
9530 * extent, so we need to process it a little bit to see if there
9531 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9532 * if this is a backref screwup. If we need to delete stuff
9533 * process_duplicates() will return 0, otherwise it will return
9536 if (process_duplicates(extent_cache, rec))
9538 ret = delete_duplicate_records(root, rec);
9542 * delete_duplicate_records will return the number of entries
9543 * deleted, so if it's greater than 0 then we know we actually
9544 * did something and we need to remove.
9557 cache = search_cache_extent(extent_cache, 0);
9560 rec = container_of(cache, struct extent_record, cache);
9561 if (rec->num_duplicates) {
9562 fprintf(stderr, "extent item %llu has multiple extent "
9563 "items\n", (unsigned long long)rec->start);
9567 if (rec->refs != rec->extent_item_refs) {
9568 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9569 (unsigned long long)rec->start,
9570 (unsigned long long)rec->nr);
9571 fprintf(stderr, "extent item %llu, found %llu\n",
9572 (unsigned long long)rec->extent_item_refs,
9573 (unsigned long long)rec->refs);
9574 ret = record_orphan_data_extents(root->fs_info, rec);
9580 if (all_backpointers_checked(rec, 1)) {
9581 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9582 (unsigned long long)rec->start,
9583 (unsigned long long)rec->nr);
9587 if (!rec->owner_ref_checked) {
9588 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9589 (unsigned long long)rec->start,
9590 (unsigned long long)rec->nr);
9595 if (repair && fix) {
9596 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9602 if (rec->bad_full_backref) {
9603 fprintf(stderr, "bad full backref, on [%llu]\n",
9604 (unsigned long long)rec->start);
9606 ret = fixup_extent_flags(root->fs_info, rec);
9614 * Although it's not a extent ref's problem, we reuse this
9615 * routine for error reporting.
9616 * No repair function yet.
9618 if (rec->crossing_stripes) {
9620 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9621 rec->start, rec->start + rec->max_size);
9625 if (rec->wrong_chunk_type) {
9627 "bad extent [%llu, %llu), type mismatch with chunk\n",
9628 rec->start, rec->start + rec->max_size);
9632 remove_cache_extent(extent_cache, cache);
9633 free_all_extent_backrefs(rec);
9634 if (!init_extent_tree && repair && (!cur_err || fix))
9635 clear_extent_dirty(root->fs_info->excluded_extents,
9637 rec->start + rec->max_size - 1);
9642 if (ret && ret != -EAGAIN) {
9643 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9646 struct btrfs_trans_handle *trans;
9648 root = root->fs_info->extent_root;
9649 trans = btrfs_start_transaction(root, 1);
9650 if (IS_ERR(trans)) {
9651 ret = PTR_ERR(trans);
9655 ret = btrfs_fix_block_accounting(trans, root);
9658 ret = btrfs_commit_transaction(trans, root);
9667 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9671 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9672 stripe_size = length;
9673 stripe_size /= num_stripes;
9674 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9675 stripe_size = length * 2;
9676 stripe_size /= num_stripes;
9677 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9678 stripe_size = length;
9679 stripe_size /= (num_stripes - 1);
9680 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9681 stripe_size = length;
9682 stripe_size /= (num_stripes - 2);
9684 stripe_size = length;
9690 * Check the chunk with its block group/dev list ref:
9691 * Return 0 if all refs seems valid.
9692 * Return 1 if part of refs seems valid, need later check for rebuild ref
9693 * like missing block group and needs to search extent tree to rebuild them.
9694 * Return -1 if essential refs are missing and unable to rebuild.
9696 static int check_chunk_refs(struct chunk_record *chunk_rec,
9697 struct block_group_tree *block_group_cache,
9698 struct device_extent_tree *dev_extent_cache,
9701 struct cache_extent *block_group_item;
9702 struct block_group_record *block_group_rec;
9703 struct cache_extent *dev_extent_item;
9704 struct device_extent_record *dev_extent_rec;
9708 int metadump_v2 = 0;
9712 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9715 if (block_group_item) {
9716 block_group_rec = container_of(block_group_item,
9717 struct block_group_record,
9719 if (chunk_rec->length != block_group_rec->offset ||
9720 chunk_rec->offset != block_group_rec->objectid ||
9722 chunk_rec->type_flags != block_group_rec->flags)) {
9725 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9726 chunk_rec->objectid,
9731 chunk_rec->type_flags,
9732 block_group_rec->objectid,
9733 block_group_rec->type,
9734 block_group_rec->offset,
9735 block_group_rec->offset,
9736 block_group_rec->objectid,
9737 block_group_rec->flags);
9740 list_del_init(&block_group_rec->list);
9741 chunk_rec->bg_rec = block_group_rec;
9746 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9747 chunk_rec->objectid,
9752 chunk_rec->type_flags);
9759 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9760 chunk_rec->num_stripes);
9761 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9762 devid = chunk_rec->stripes[i].devid;
9763 offset = chunk_rec->stripes[i].offset;
9764 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9765 devid, offset, length);
9766 if (dev_extent_item) {
9767 dev_extent_rec = container_of(dev_extent_item,
9768 struct device_extent_record,
9770 if (dev_extent_rec->objectid != devid ||
9771 dev_extent_rec->offset != offset ||
9772 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9773 dev_extent_rec->length != length) {
9776 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9777 chunk_rec->objectid,
9780 chunk_rec->stripes[i].devid,
9781 chunk_rec->stripes[i].offset,
9782 dev_extent_rec->objectid,
9783 dev_extent_rec->offset,
9784 dev_extent_rec->length);
9787 list_move(&dev_extent_rec->chunk_list,
9788 &chunk_rec->dextents);
9793 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9794 chunk_rec->objectid,
9797 chunk_rec->stripes[i].devid,
9798 chunk_rec->stripes[i].offset);
9805 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9806 int check_chunks(struct cache_tree *chunk_cache,
9807 struct block_group_tree *block_group_cache,
9808 struct device_extent_tree *dev_extent_cache,
9809 struct list_head *good, struct list_head *bad,
9810 struct list_head *rebuild, int silent)
9812 struct cache_extent *chunk_item;
9813 struct chunk_record *chunk_rec;
9814 struct block_group_record *bg_rec;
9815 struct device_extent_record *dext_rec;
9819 chunk_item = first_cache_extent(chunk_cache);
9820 while (chunk_item) {
9821 chunk_rec = container_of(chunk_item, struct chunk_record,
9823 err = check_chunk_refs(chunk_rec, block_group_cache,
9824 dev_extent_cache, silent);
9827 if (err == 0 && good)
9828 list_add_tail(&chunk_rec->list, good);
9829 if (err > 0 && rebuild)
9830 list_add_tail(&chunk_rec->list, rebuild);
9832 list_add_tail(&chunk_rec->list, bad);
9833 chunk_item = next_cache_extent(chunk_item);
9836 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9839 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9847 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9851 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9862 static int check_device_used(struct device_record *dev_rec,
9863 struct device_extent_tree *dext_cache)
9865 struct cache_extent *cache;
9866 struct device_extent_record *dev_extent_rec;
9869 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9871 dev_extent_rec = container_of(cache,
9872 struct device_extent_record,
9874 if (dev_extent_rec->objectid != dev_rec->devid)
9877 list_del_init(&dev_extent_rec->device_list);
9878 total_byte += dev_extent_rec->length;
9879 cache = next_cache_extent(cache);
9882 if (total_byte != dev_rec->byte_used) {
9884 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9885 total_byte, dev_rec->byte_used, dev_rec->objectid,
9886 dev_rec->type, dev_rec->offset);
9893 /* check btrfs_dev_item -> btrfs_dev_extent */
9894 static int check_devices(struct rb_root *dev_cache,
9895 struct device_extent_tree *dev_extent_cache)
9897 struct rb_node *dev_node;
9898 struct device_record *dev_rec;
9899 struct device_extent_record *dext_rec;
9903 dev_node = rb_first(dev_cache);
9905 dev_rec = container_of(dev_node, struct device_record, node);
9906 err = check_device_used(dev_rec, dev_extent_cache);
9910 dev_node = rb_next(dev_node);
9912 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9915 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9916 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9923 static int add_root_item_to_list(struct list_head *head,
9924 u64 objectid, u64 bytenr, u64 last_snapshot,
9925 u8 level, u8 drop_level,
9926 struct btrfs_key *drop_key)
9929 struct root_item_record *ri_rec;
9930 ri_rec = malloc(sizeof(*ri_rec));
9933 ri_rec->bytenr = bytenr;
9934 ri_rec->objectid = objectid;
9935 ri_rec->level = level;
9936 ri_rec->drop_level = drop_level;
9937 ri_rec->last_snapshot = last_snapshot;
9939 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9940 list_add_tail(&ri_rec->list, head);
9945 static void free_root_item_list(struct list_head *list)
9947 struct root_item_record *ri_rec;
9949 while (!list_empty(list)) {
9950 ri_rec = list_first_entry(list, struct root_item_record,
9952 list_del_init(&ri_rec->list);
9957 static int deal_root_from_list(struct list_head *list,
9958 struct btrfs_root *root,
9959 struct block_info *bits,
9961 struct cache_tree *pending,
9962 struct cache_tree *seen,
9963 struct cache_tree *reada,
9964 struct cache_tree *nodes,
9965 struct cache_tree *extent_cache,
9966 struct cache_tree *chunk_cache,
9967 struct rb_root *dev_cache,
9968 struct block_group_tree *block_group_cache,
9969 struct device_extent_tree *dev_extent_cache)
9974 while (!list_empty(list)) {
9975 struct root_item_record *rec;
9976 struct extent_buffer *buf;
9977 rec = list_entry(list->next,
9978 struct root_item_record, list);
9980 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9981 if (!extent_buffer_uptodate(buf)) {
9982 free_extent_buffer(buf);
9986 ret = add_root_to_pending(buf, extent_cache, pending,
9987 seen, nodes, rec->objectid);
9991 * To rebuild extent tree, we need deal with snapshot
9992 * one by one, otherwise we deal with node firstly which
9993 * can maximize readahead.
9996 ret = run_next_block(root, bits, bits_nr, &last,
9997 pending, seen, reada, nodes,
9998 extent_cache, chunk_cache,
9999 dev_cache, block_group_cache,
10000 dev_extent_cache, rec);
10004 free_extent_buffer(buf);
10005 list_del(&rec->list);
10011 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10012 reada, nodes, extent_cache, chunk_cache,
10013 dev_cache, block_group_cache,
10014 dev_extent_cache, NULL);
10024 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
10026 struct rb_root dev_cache;
10027 struct cache_tree chunk_cache;
10028 struct block_group_tree block_group_cache;
10029 struct device_extent_tree dev_extent_cache;
10030 struct cache_tree extent_cache;
10031 struct cache_tree seen;
10032 struct cache_tree pending;
10033 struct cache_tree reada;
10034 struct cache_tree nodes;
10035 struct extent_io_tree excluded_extents;
10036 struct cache_tree corrupt_blocks;
10037 struct btrfs_path path;
10038 struct btrfs_key key;
10039 struct btrfs_key found_key;
10041 struct block_info *bits;
10043 struct extent_buffer *leaf;
10045 struct btrfs_root_item ri;
10046 struct list_head dropping_trees;
10047 struct list_head normal_trees;
10048 struct btrfs_root *root1;
10049 struct btrfs_root *root;
10053 root = fs_info->fs_root;
10054 dev_cache = RB_ROOT;
10055 cache_tree_init(&chunk_cache);
10056 block_group_tree_init(&block_group_cache);
10057 device_extent_tree_init(&dev_extent_cache);
10059 cache_tree_init(&extent_cache);
10060 cache_tree_init(&seen);
10061 cache_tree_init(&pending);
10062 cache_tree_init(&nodes);
10063 cache_tree_init(&reada);
10064 cache_tree_init(&corrupt_blocks);
10065 extent_io_tree_init(&excluded_extents);
10066 INIT_LIST_HEAD(&dropping_trees);
10067 INIT_LIST_HEAD(&normal_trees);
10070 fs_info->excluded_extents = &excluded_extents;
10071 fs_info->fsck_extent_cache = &extent_cache;
10072 fs_info->free_extent_hook = free_extent_hook;
10073 fs_info->corrupt_blocks = &corrupt_blocks;
10077 bits = malloc(bits_nr * sizeof(struct block_info));
10083 if (ctx.progress_enabled) {
10084 ctx.tp = TASK_EXTENTS;
10085 task_start(ctx.info);
10089 root1 = fs_info->tree_root;
10090 level = btrfs_header_level(root1->node);
10091 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10092 root1->node->start, 0, level, 0, NULL);
10095 root1 = fs_info->chunk_root;
10096 level = btrfs_header_level(root1->node);
10097 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
10098 root1->node->start, 0, level, 0, NULL);
10101 btrfs_init_path(&path);
10104 key.type = BTRFS_ROOT_ITEM_KEY;
10105 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
10109 leaf = path.nodes[0];
10110 slot = path.slots[0];
10111 if (slot >= btrfs_header_nritems(path.nodes[0])) {
10112 ret = btrfs_next_leaf(root, &path);
10115 leaf = path.nodes[0];
10116 slot = path.slots[0];
10118 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
10119 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
10120 unsigned long offset;
10123 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
10124 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
10125 last_snapshot = btrfs_root_last_snapshot(&ri);
10126 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
10127 level = btrfs_root_level(&ri);
10128 ret = add_root_item_to_list(&normal_trees,
10129 found_key.objectid,
10130 btrfs_root_bytenr(&ri),
10131 last_snapshot, level,
10136 level = btrfs_root_level(&ri);
10137 objectid = found_key.objectid;
10138 btrfs_disk_key_to_cpu(&found_key,
10139 &ri.drop_progress);
10140 ret = add_root_item_to_list(&dropping_trees,
10142 btrfs_root_bytenr(&ri),
10143 last_snapshot, level,
10144 ri.drop_level, &found_key);
10151 btrfs_release_path(&path);
10154 * check_block can return -EAGAIN if it fixes something, please keep
10155 * this in mind when dealing with return values from these functions, if
10156 * we get -EAGAIN we want to fall through and restart the loop.
10158 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
10159 &seen, &reada, &nodes, &extent_cache,
10160 &chunk_cache, &dev_cache, &block_group_cache,
10161 &dev_extent_cache);
10163 if (ret == -EAGAIN)
10167 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
10168 &pending, &seen, &reada, &nodes,
10169 &extent_cache, &chunk_cache, &dev_cache,
10170 &block_group_cache, &dev_extent_cache);
10172 if (ret == -EAGAIN)
10177 ret = check_chunks(&chunk_cache, &block_group_cache,
10178 &dev_extent_cache, NULL, NULL, NULL, 0);
10180 if (ret == -EAGAIN)
10185 ret = check_extent_refs(root, &extent_cache);
10187 if (ret == -EAGAIN)
10192 ret = check_devices(&dev_cache, &dev_extent_cache);
10197 task_stop(ctx.info);
10199 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10200 extent_io_tree_cleanup(&excluded_extents);
10201 fs_info->fsck_extent_cache = NULL;
10202 fs_info->free_extent_hook = NULL;
10203 fs_info->corrupt_blocks = NULL;
10204 fs_info->excluded_extents = NULL;
10207 free_chunk_cache_tree(&chunk_cache);
10208 free_device_cache_tree(&dev_cache);
10209 free_block_group_tree(&block_group_cache);
10210 free_device_extent_tree(&dev_extent_cache);
10211 free_extent_cache_tree(&seen);
10212 free_extent_cache_tree(&pending);
10213 free_extent_cache_tree(&reada);
10214 free_extent_cache_tree(&nodes);
10215 free_root_item_list(&normal_trees);
10216 free_root_item_list(&dropping_trees);
10219 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
10220 free_extent_cache_tree(&seen);
10221 free_extent_cache_tree(&pending);
10222 free_extent_cache_tree(&reada);
10223 free_extent_cache_tree(&nodes);
10224 free_chunk_cache_tree(&chunk_cache);
10225 free_block_group_tree(&block_group_cache);
10226 free_device_cache_tree(&dev_cache);
10227 free_device_extent_tree(&dev_extent_cache);
10228 free_extent_record_cache(&extent_cache);
10229 free_root_item_list(&normal_trees);
10230 free_root_item_list(&dropping_trees);
10231 extent_io_tree_cleanup(&excluded_extents);
10236 * Check backrefs of a tree block given by @bytenr or @eb.
10238 * @root: the root containing the @bytenr or @eb
10239 * @eb: tree block extent buffer, can be NULL
10240 * @bytenr: bytenr of the tree block to search
10241 * @level: tree level of the tree block
10242 * @owner: owner of the tree block
10244 * Return >0 for any error found and output error message
10245 * Return 0 for no error found
10247 static int check_tree_block_ref(struct btrfs_root *root,
10248 struct extent_buffer *eb, u64 bytenr,
10249 int level, u64 owner)
10251 struct btrfs_key key;
10252 struct btrfs_root *extent_root = root->fs_info->extent_root;
10253 struct btrfs_path path;
10254 struct btrfs_extent_item *ei;
10255 struct btrfs_extent_inline_ref *iref;
10256 struct extent_buffer *leaf;
10262 u32 nodesize = root->fs_info->nodesize;
10265 int tree_reloc_root = 0;
10270 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10271 btrfs_header_bytenr(root->node) == bytenr)
10272 tree_reloc_root = 1;
10274 btrfs_init_path(&path);
10275 key.objectid = bytenr;
10276 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10277 key.type = BTRFS_METADATA_ITEM_KEY;
10279 key.type = BTRFS_EXTENT_ITEM_KEY;
10280 key.offset = (u64)-1;
10282 /* Search for the backref in extent tree */
10283 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10285 err |= BACKREF_MISSING;
10288 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10290 err |= BACKREF_MISSING;
10294 leaf = path.nodes[0];
10295 slot = path.slots[0];
10296 btrfs_item_key_to_cpu(leaf, &key, slot);
10298 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10300 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10301 skinny_level = (int)key.offset;
10302 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10304 struct btrfs_tree_block_info *info;
10306 info = (struct btrfs_tree_block_info *)(ei + 1);
10307 skinny_level = btrfs_tree_block_level(leaf, info);
10308 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10315 if (!(btrfs_extent_flags(leaf, ei) &
10316 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10318 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10319 key.objectid, nodesize,
10320 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10321 err = BACKREF_MISMATCH;
10323 header_gen = btrfs_header_generation(eb);
10324 extent_gen = btrfs_extent_generation(leaf, ei);
10325 if (header_gen != extent_gen) {
10327 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10328 key.objectid, nodesize, header_gen,
10330 err = BACKREF_MISMATCH;
10332 if (level != skinny_level) {
10334 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10335 key.objectid, nodesize, level, skinny_level);
10336 err = BACKREF_MISMATCH;
10338 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10340 "extent[%llu %u] is referred by other roots than %llu",
10341 key.objectid, nodesize, root->objectid);
10342 err = BACKREF_MISMATCH;
10347 * Iterate the extent/metadata item to find the exact backref
10349 item_size = btrfs_item_size_nr(leaf, slot);
10350 ptr = (unsigned long)iref;
10351 end = (unsigned long)ei + item_size;
10352 while (ptr < end) {
10353 iref = (struct btrfs_extent_inline_ref *)ptr;
10354 type = btrfs_extent_inline_ref_type(leaf, iref);
10355 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10357 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10358 (offset == root->objectid || offset == owner)) {
10360 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10362 * Backref of tree reloc root points to itself, no need
10363 * to check backref any more.
10365 if (tree_reloc_root)
10368 /* Check if the backref points to valid referencer */
10369 found_ref = !check_tree_block_ref(root, NULL,
10370 offset, level + 1, owner);
10375 ptr += btrfs_extent_inline_ref_size(type);
10379 * Inlined extent item doesn't have what we need, check
10380 * TREE_BLOCK_REF_KEY
10383 btrfs_release_path(&path);
10384 key.objectid = bytenr;
10385 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10386 key.offset = root->objectid;
10388 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10393 err |= BACKREF_MISSING;
10395 btrfs_release_path(&path);
10396 if (eb && (err & BACKREF_MISSING))
10397 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10398 bytenr, nodesize, owner, level);
10403 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10405 * Return >0 any error found and output error message
10406 * Return 0 for no error found
10408 static int check_extent_data_item(struct btrfs_root *root,
10409 struct extent_buffer *eb, int slot)
10411 struct btrfs_file_extent_item *fi;
10412 struct btrfs_path path;
10413 struct btrfs_root *extent_root = root->fs_info->extent_root;
10414 struct btrfs_key fi_key;
10415 struct btrfs_key dbref_key;
10416 struct extent_buffer *leaf;
10417 struct btrfs_extent_item *ei;
10418 struct btrfs_extent_inline_ref *iref;
10419 struct btrfs_extent_data_ref *dref;
10422 u64 disk_num_bytes;
10423 u64 extent_num_bytes;
10430 int found_dbackref = 0;
10434 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10435 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10437 /* Nothing to check for hole and inline data extents */
10438 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10439 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10442 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10443 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10444 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10446 /* Check unaligned disk_num_bytes and num_bytes */
10447 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10449 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10450 fi_key.objectid, fi_key.offset, disk_num_bytes,
10451 root->fs_info->sectorsize);
10452 err |= BYTES_UNALIGNED;
10454 data_bytes_allocated += disk_num_bytes;
10456 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10458 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10459 fi_key.objectid, fi_key.offset, extent_num_bytes,
10460 root->fs_info->sectorsize);
10461 err |= BYTES_UNALIGNED;
10463 data_bytes_referenced += extent_num_bytes;
10465 owner = btrfs_header_owner(eb);
10467 /* Check the extent item of the file extent in extent tree */
10468 btrfs_init_path(&path);
10469 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10470 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10471 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10473 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10477 leaf = path.nodes[0];
10478 slot = path.slots[0];
10479 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10481 extent_flags = btrfs_extent_flags(leaf, ei);
10483 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10485 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10486 disk_bytenr, disk_num_bytes,
10487 BTRFS_EXTENT_FLAG_DATA);
10488 err |= BACKREF_MISMATCH;
10491 /* Check data backref inside that extent item */
10492 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10493 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10494 ptr = (unsigned long)iref;
10495 end = (unsigned long)ei + item_size;
10496 while (ptr < end) {
10497 iref = (struct btrfs_extent_inline_ref *)ptr;
10498 type = btrfs_extent_inline_ref_type(leaf, iref);
10499 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10501 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10502 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10503 if (ref_root == owner || ref_root == root->objectid)
10504 found_dbackref = 1;
10505 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10506 found_dbackref = !check_tree_block_ref(root, NULL,
10507 btrfs_extent_inline_ref_offset(leaf, iref),
10511 if (found_dbackref)
10513 ptr += btrfs_extent_inline_ref_size(type);
10516 if (!found_dbackref) {
10517 btrfs_release_path(&path);
10519 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10520 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10521 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10522 dbref_key.offset = hash_extent_data_ref(root->objectid,
10523 fi_key.objectid, fi_key.offset);
10525 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10526 &dbref_key, &path, 0, 0);
10528 found_dbackref = 1;
10532 btrfs_release_path(&path);
10535 * Neither inlined nor EXTENT_DATA_REF found, try
10536 * SHARED_DATA_REF as last chance.
10538 dbref_key.objectid = disk_bytenr;
10539 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10540 dbref_key.offset = eb->start;
10542 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10543 &dbref_key, &path, 0, 0);
10545 found_dbackref = 1;
10551 if (!found_dbackref)
10552 err |= BACKREF_MISSING;
10553 btrfs_release_path(&path);
10554 if (err & BACKREF_MISSING) {
10555 error("data extent[%llu %llu] backref lost",
10556 disk_bytenr, disk_num_bytes);
10562 * Get real tree block level for the case like shared block
10563 * Return >= 0 as tree level
10564 * Return <0 for error
10566 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10568 struct extent_buffer *eb;
10569 struct btrfs_path path;
10570 struct btrfs_key key;
10571 struct btrfs_extent_item *ei;
10578 /* Search extent tree for extent generation and level */
10579 key.objectid = bytenr;
10580 key.type = BTRFS_METADATA_ITEM_KEY;
10581 key.offset = (u64)-1;
10583 btrfs_init_path(&path);
10584 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10587 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10595 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10596 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10597 struct btrfs_extent_item);
10598 flags = btrfs_extent_flags(path.nodes[0], ei);
10599 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10604 /* Get transid for later read_tree_block() check */
10605 transid = btrfs_extent_generation(path.nodes[0], ei);
10607 /* Get backref level as one source */
10608 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10609 backref_level = key.offset;
10611 struct btrfs_tree_block_info *info;
10613 info = (struct btrfs_tree_block_info *)(ei + 1);
10614 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10616 btrfs_release_path(&path);
10618 /* Get level from tree block as an alternative source */
10619 eb = read_tree_block(fs_info, bytenr, transid);
10620 if (!extent_buffer_uptodate(eb)) {
10621 free_extent_buffer(eb);
10624 header_level = btrfs_header_level(eb);
10625 free_extent_buffer(eb);
10627 if (header_level != backref_level)
10629 return header_level;
10632 btrfs_release_path(&path);
10637 * Check if a tree block backref is valid (points to a valid tree block)
10638 * if level == -1, level will be resolved
10639 * Return >0 for any error found and print error message
10641 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10642 u64 bytenr, int level)
10644 struct btrfs_root *root;
10645 struct btrfs_key key;
10646 struct btrfs_path path;
10647 struct extent_buffer *eb;
10648 struct extent_buffer *node;
10649 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10653 /* Query level for level == -1 special case */
10655 level = query_tree_block_level(fs_info, bytenr);
10657 err |= REFERENCER_MISSING;
10661 key.objectid = root_id;
10662 key.type = BTRFS_ROOT_ITEM_KEY;
10663 key.offset = (u64)-1;
10665 root = btrfs_read_fs_root(fs_info, &key);
10666 if (IS_ERR(root)) {
10667 err |= REFERENCER_MISSING;
10671 /* Read out the tree block to get item/node key */
10672 eb = read_tree_block(fs_info, bytenr, 0);
10673 if (!extent_buffer_uptodate(eb)) {
10674 err |= REFERENCER_MISSING;
10675 free_extent_buffer(eb);
10679 /* Empty tree, no need to check key */
10680 if (!btrfs_header_nritems(eb) && !level) {
10681 free_extent_buffer(eb);
10686 btrfs_node_key_to_cpu(eb, &key, 0);
10688 btrfs_item_key_to_cpu(eb, &key, 0);
10690 free_extent_buffer(eb);
10692 btrfs_init_path(&path);
10693 path.lowest_level = level;
10694 /* Search with the first key, to ensure we can reach it */
10695 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10697 err |= REFERENCER_MISSING;
10701 node = path.nodes[level];
10702 if (btrfs_header_bytenr(node) != bytenr) {
10704 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10705 bytenr, nodesize, bytenr,
10706 btrfs_header_bytenr(node));
10707 err |= REFERENCER_MISMATCH;
10709 if (btrfs_header_level(node) != level) {
10711 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10712 bytenr, nodesize, level,
10713 btrfs_header_level(node));
10714 err |= REFERENCER_MISMATCH;
10718 btrfs_release_path(&path);
10720 if (err & REFERENCER_MISSING) {
10722 error("extent [%llu %d] lost referencer (owner: %llu)",
10723 bytenr, nodesize, root_id);
10726 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10727 bytenr, nodesize, root_id, level);
10734 * Check if tree block @eb is tree reloc root.
10735 * Return 0 if it's not or any problem happens
10736 * Return 1 if it's a tree reloc root
10738 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10739 struct extent_buffer *eb)
10741 struct btrfs_root *tree_reloc_root;
10742 struct btrfs_key key;
10743 u64 bytenr = btrfs_header_bytenr(eb);
10744 u64 owner = btrfs_header_owner(eb);
10747 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10748 key.offset = owner;
10749 key.type = BTRFS_ROOT_ITEM_KEY;
10751 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10752 if (IS_ERR(tree_reloc_root))
10755 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10757 btrfs_free_fs_root(tree_reloc_root);
10762 * Check referencer for shared block backref
10763 * If level == -1, this function will resolve the level.
10765 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10766 u64 parent, u64 bytenr, int level)
10768 struct extent_buffer *eb;
10770 int found_parent = 0;
10773 eb = read_tree_block(fs_info, parent, 0);
10774 if (!extent_buffer_uptodate(eb))
10778 level = query_tree_block_level(fs_info, bytenr);
10782 /* It's possible it's a tree reloc root */
10783 if (parent == bytenr) {
10784 if (is_tree_reloc_root(fs_info, eb))
10789 if (level + 1 != btrfs_header_level(eb))
10792 nr = btrfs_header_nritems(eb);
10793 for (i = 0; i < nr; i++) {
10794 if (bytenr == btrfs_node_blockptr(eb, i)) {
10800 free_extent_buffer(eb);
10801 if (!found_parent) {
10803 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10804 bytenr, fs_info->nodesize, parent, level);
10805 return REFERENCER_MISSING;
10811 * Check referencer for normal (inlined) data ref
10812 * If len == 0, it will be resolved by searching in extent tree
10814 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10815 u64 root_id, u64 objectid, u64 offset,
10816 u64 bytenr, u64 len, u32 count)
10818 struct btrfs_root *root;
10819 struct btrfs_root *extent_root = fs_info->extent_root;
10820 struct btrfs_key key;
10821 struct btrfs_path path;
10822 struct extent_buffer *leaf;
10823 struct btrfs_file_extent_item *fi;
10824 u32 found_count = 0;
10829 key.objectid = bytenr;
10830 key.type = BTRFS_EXTENT_ITEM_KEY;
10831 key.offset = (u64)-1;
10833 btrfs_init_path(&path);
10834 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10837 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10840 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10841 if (key.objectid != bytenr ||
10842 key.type != BTRFS_EXTENT_ITEM_KEY)
10845 btrfs_release_path(&path);
10847 key.objectid = root_id;
10848 key.type = BTRFS_ROOT_ITEM_KEY;
10849 key.offset = (u64)-1;
10850 btrfs_init_path(&path);
10852 root = btrfs_read_fs_root(fs_info, &key);
10856 key.objectid = objectid;
10857 key.type = BTRFS_EXTENT_DATA_KEY;
10859 * It can be nasty as data backref offset is
10860 * file offset - file extent offset, which is smaller or
10861 * equal to original backref offset. The only special case is
10862 * overflow. So we need to special check and do further search.
10864 key.offset = offset & (1ULL << 63) ? 0 : offset;
10866 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10871 * Search afterwards to get correct one
10872 * NOTE: As we must do a comprehensive check on the data backref to
10873 * make sure the dref count also matches, we must iterate all file
10874 * extents for that inode.
10877 leaf = path.nodes[0];
10878 slot = path.slots[0];
10880 if (slot >= btrfs_header_nritems(leaf))
10882 btrfs_item_key_to_cpu(leaf, &key, slot);
10883 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10885 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10887 * Except normal disk bytenr and disk num bytes, we still
10888 * need to do extra check on dbackref offset as
10889 * dbackref offset = file_offset - file_extent_offset
10891 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10892 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10893 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10898 ret = btrfs_next_item(root, &path);
10903 btrfs_release_path(&path);
10904 if (found_count != count) {
10906 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10907 bytenr, len, root_id, objectid, offset, count, found_count);
10908 return REFERENCER_MISSING;
10914 * Check if the referencer of a shared data backref exists
10916 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10917 u64 parent, u64 bytenr)
10919 struct extent_buffer *eb;
10920 struct btrfs_key key;
10921 struct btrfs_file_extent_item *fi;
10923 int found_parent = 0;
10926 eb = read_tree_block(fs_info, parent, 0);
10927 if (!extent_buffer_uptodate(eb))
10930 nr = btrfs_header_nritems(eb);
10931 for (i = 0; i < nr; i++) {
10932 btrfs_item_key_to_cpu(eb, &key, i);
10933 if (key.type != BTRFS_EXTENT_DATA_KEY)
10936 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10937 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10940 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10947 free_extent_buffer(eb);
10948 if (!found_parent) {
10949 error("shared extent %llu referencer lost (parent: %llu)",
10951 return REFERENCER_MISSING;
10957 * This function will check a given extent item, including its backref and
10958 * itself (like crossing stripe boundary and type)
10960 * Since we don't use extent_record anymore, introduce new error bit
10962 static int check_extent_item(struct btrfs_fs_info *fs_info,
10963 struct extent_buffer *eb, int slot)
10965 struct btrfs_extent_item *ei;
10966 struct btrfs_extent_inline_ref *iref;
10967 struct btrfs_extent_data_ref *dref;
10971 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10972 u32 item_size = btrfs_item_size_nr(eb, slot);
10977 struct btrfs_key key;
10981 btrfs_item_key_to_cpu(eb, &key, slot);
10982 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10983 bytes_used += key.offset;
10985 bytes_used += nodesize;
10987 if (item_size < sizeof(*ei)) {
10989 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10990 * old thing when on disk format is still un-determined.
10991 * No need to care about it anymore
10993 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10997 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10998 flags = btrfs_extent_flags(eb, ei);
11000 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
11002 if (metadata && check_crossing_stripes(global_info, key.objectid,
11004 error("bad metadata [%llu, %llu) crossing stripe boundary",
11005 key.objectid, key.objectid + nodesize);
11006 err |= CROSSING_STRIPE_BOUNDARY;
11009 ptr = (unsigned long)(ei + 1);
11011 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
11012 /* Old EXTENT_ITEM metadata */
11013 struct btrfs_tree_block_info *info;
11015 info = (struct btrfs_tree_block_info *)ptr;
11016 level = btrfs_tree_block_level(eb, info);
11017 ptr += sizeof(struct btrfs_tree_block_info);
11019 /* New METADATA_ITEM */
11020 level = key.offset;
11022 end = (unsigned long)ei + item_size;
11025 /* Reached extent item end normally */
11029 /* Beyond extent item end, wrong item size */
11031 err |= ITEM_SIZE_MISMATCH;
11032 error("extent item at bytenr %llu slot %d has wrong size",
11037 /* Now check every backref in this extent item */
11038 iref = (struct btrfs_extent_inline_ref *)ptr;
11039 type = btrfs_extent_inline_ref_type(eb, iref);
11040 offset = btrfs_extent_inline_ref_offset(eb, iref);
11042 case BTRFS_TREE_BLOCK_REF_KEY:
11043 ret = check_tree_block_backref(fs_info, offset, key.objectid,
11047 case BTRFS_SHARED_BLOCK_REF_KEY:
11048 ret = check_shared_block_backref(fs_info, offset, key.objectid,
11052 case BTRFS_EXTENT_DATA_REF_KEY:
11053 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11054 ret = check_extent_data_backref(fs_info,
11055 btrfs_extent_data_ref_root(eb, dref),
11056 btrfs_extent_data_ref_objectid(eb, dref),
11057 btrfs_extent_data_ref_offset(eb, dref),
11058 key.objectid, key.offset,
11059 btrfs_extent_data_ref_count(eb, dref));
11062 case BTRFS_SHARED_DATA_REF_KEY:
11063 ret = check_shared_data_backref(fs_info, offset, key.objectid);
11067 error("extent[%llu %d %llu] has unknown ref type: %d",
11068 key.objectid, key.type, key.offset, type);
11069 err |= UNKNOWN_TYPE;
11073 ptr += btrfs_extent_inline_ref_size(type);
11081 * Check if a dev extent item is referred correctly by its chunk
11083 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
11084 struct extent_buffer *eb, int slot)
11086 struct btrfs_root *chunk_root = fs_info->chunk_root;
11087 struct btrfs_dev_extent *ptr;
11088 struct btrfs_path path;
11089 struct btrfs_key chunk_key;
11090 struct btrfs_key devext_key;
11091 struct btrfs_chunk *chunk;
11092 struct extent_buffer *l;
11096 int found_chunk = 0;
11099 btrfs_item_key_to_cpu(eb, &devext_key, slot);
11100 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
11101 length = btrfs_dev_extent_length(eb, ptr);
11103 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
11104 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11105 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
11107 btrfs_init_path(&path);
11108 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11113 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
11114 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
11119 if (btrfs_stripe_length(fs_info, l, chunk) != length)
11122 num_stripes = btrfs_chunk_num_stripes(l, chunk);
11123 for (i = 0; i < num_stripes; i++) {
11124 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
11125 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
11127 if (devid == devext_key.objectid &&
11128 offset == devext_key.offset) {
11134 btrfs_release_path(&path);
11135 if (!found_chunk) {
11137 "device extent[%llu, %llu, %llu] did not find the related chunk",
11138 devext_key.objectid, devext_key.offset, length);
11139 return REFERENCER_MISSING;
11145 * Check if the used space is correct with the dev item
11147 static int check_dev_item(struct btrfs_fs_info *fs_info,
11148 struct extent_buffer *eb, int slot)
11150 struct btrfs_root *dev_root = fs_info->dev_root;
11151 struct btrfs_dev_item *dev_item;
11152 struct btrfs_path path;
11153 struct btrfs_key key;
11154 struct btrfs_dev_extent *ptr;
11160 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
11161 dev_id = btrfs_device_id(eb, dev_item);
11162 used = btrfs_device_bytes_used(eb, dev_item);
11164 key.objectid = dev_id;
11165 key.type = BTRFS_DEV_EXTENT_KEY;
11168 btrfs_init_path(&path);
11169 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
11171 btrfs_item_key_to_cpu(eb, &key, slot);
11172 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
11173 key.objectid, key.type, key.offset);
11174 btrfs_release_path(&path);
11175 return REFERENCER_MISSING;
11178 /* Iterate dev_extents to calculate the used space of a device */
11180 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11183 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11184 if (key.objectid > dev_id)
11186 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11189 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11190 struct btrfs_dev_extent);
11191 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11193 ret = btrfs_next_item(dev_root, &path);
11197 btrfs_release_path(&path);
11199 if (used != total) {
11200 btrfs_item_key_to_cpu(eb, &key, slot);
11202 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11203 total, used, BTRFS_ROOT_TREE_OBJECTID,
11204 BTRFS_DEV_EXTENT_KEY, dev_id);
11205 return ACCOUNTING_MISMATCH;
11211 * Check a block group item with its referener (chunk) and its used space
11212 * with extent/metadata item
11214 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11215 struct extent_buffer *eb, int slot)
11217 struct btrfs_root *extent_root = fs_info->extent_root;
11218 struct btrfs_root *chunk_root = fs_info->chunk_root;
11219 struct btrfs_block_group_item *bi;
11220 struct btrfs_block_group_item bg_item;
11221 struct btrfs_path path;
11222 struct btrfs_key bg_key;
11223 struct btrfs_key chunk_key;
11224 struct btrfs_key extent_key;
11225 struct btrfs_chunk *chunk;
11226 struct extent_buffer *leaf;
11227 struct btrfs_extent_item *ei;
11228 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11236 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11237 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11238 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11239 used = btrfs_block_group_used(&bg_item);
11240 bg_flags = btrfs_block_group_flags(&bg_item);
11242 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11243 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11244 chunk_key.offset = bg_key.objectid;
11246 btrfs_init_path(&path);
11247 /* Search for the referencer chunk */
11248 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11251 "block group[%llu %llu] did not find the related chunk item",
11252 bg_key.objectid, bg_key.offset);
11253 err |= REFERENCER_MISSING;
11255 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11256 struct btrfs_chunk);
11257 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11260 "block group[%llu %llu] related chunk item length does not match",
11261 bg_key.objectid, bg_key.offset);
11262 err |= REFERENCER_MISMATCH;
11265 btrfs_release_path(&path);
11267 /* Search from the block group bytenr */
11268 extent_key.objectid = bg_key.objectid;
11269 extent_key.type = 0;
11270 extent_key.offset = 0;
11272 btrfs_init_path(&path);
11273 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11277 /* Iterate extent tree to account used space */
11279 leaf = path.nodes[0];
11281 /* Search slot can point to the last item beyond leaf nritems */
11282 if (path.slots[0] >= btrfs_header_nritems(leaf))
11285 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11286 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11289 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11290 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11292 if (extent_key.objectid < bg_key.objectid)
11295 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11298 total += extent_key.offset;
11300 ei = btrfs_item_ptr(leaf, path.slots[0],
11301 struct btrfs_extent_item);
11302 flags = btrfs_extent_flags(leaf, ei);
11303 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11304 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11306 "bad extent[%llu, %llu) type mismatch with chunk",
11307 extent_key.objectid,
11308 extent_key.objectid + extent_key.offset);
11309 err |= CHUNK_TYPE_MISMATCH;
11311 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11312 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11313 BTRFS_BLOCK_GROUP_METADATA))) {
11315 "bad extent[%llu, %llu) type mismatch with chunk",
11316 extent_key.objectid,
11317 extent_key.objectid + nodesize);
11318 err |= CHUNK_TYPE_MISMATCH;
11322 ret = btrfs_next_item(extent_root, &path);
11328 btrfs_release_path(&path);
11330 if (total != used) {
11332 "block group[%llu %llu] used %llu but extent items used %llu",
11333 bg_key.objectid, bg_key.offset, used, total);
11334 err |= ACCOUNTING_MISMATCH;
11340 * Check a chunk item.
11341 * Including checking all referred dev_extents and block group
11343 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11344 struct extent_buffer *eb, int slot)
11346 struct btrfs_root *extent_root = fs_info->extent_root;
11347 struct btrfs_root *dev_root = fs_info->dev_root;
11348 struct btrfs_path path;
11349 struct btrfs_key chunk_key;
11350 struct btrfs_key bg_key;
11351 struct btrfs_key devext_key;
11352 struct btrfs_chunk *chunk;
11353 struct extent_buffer *leaf;
11354 struct btrfs_block_group_item *bi;
11355 struct btrfs_block_group_item bg_item;
11356 struct btrfs_dev_extent *ptr;
11368 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11369 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11370 length = btrfs_chunk_length(eb, chunk);
11371 chunk_end = chunk_key.offset + length;
11372 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11375 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11377 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11380 type = btrfs_chunk_type(eb, chunk);
11382 bg_key.objectid = chunk_key.offset;
11383 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11384 bg_key.offset = length;
11386 btrfs_init_path(&path);
11387 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11390 "chunk[%llu %llu) did not find the related block group item",
11391 chunk_key.offset, chunk_end);
11392 err |= REFERENCER_MISSING;
11394 leaf = path.nodes[0];
11395 bi = btrfs_item_ptr(leaf, path.slots[0],
11396 struct btrfs_block_group_item);
11397 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11399 if (btrfs_block_group_flags(&bg_item) != type) {
11401 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11402 chunk_key.offset, chunk_end, type,
11403 btrfs_block_group_flags(&bg_item));
11404 err |= REFERENCER_MISSING;
11408 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11409 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11410 for (i = 0; i < num_stripes; i++) {
11411 btrfs_release_path(&path);
11412 btrfs_init_path(&path);
11413 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11414 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11415 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11417 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11420 goto not_match_dev;
11422 leaf = path.nodes[0];
11423 ptr = btrfs_item_ptr(leaf, path.slots[0],
11424 struct btrfs_dev_extent);
11425 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11426 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11427 if (objectid != chunk_key.objectid ||
11428 offset != chunk_key.offset ||
11429 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11430 goto not_match_dev;
11433 err |= BACKREF_MISSING;
11435 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11436 chunk_key.objectid, chunk_end, i);
11439 btrfs_release_path(&path);
11445 * Main entry function to check known items and update related accounting info
11447 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11449 struct btrfs_fs_info *fs_info = root->fs_info;
11450 struct btrfs_key key;
11453 struct btrfs_extent_data_ref *dref;
11458 btrfs_item_key_to_cpu(eb, &key, slot);
11462 case BTRFS_EXTENT_DATA_KEY:
11463 ret = check_extent_data_item(root, eb, slot);
11466 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11467 ret = check_block_group_item(fs_info, eb, slot);
11470 case BTRFS_DEV_ITEM_KEY:
11471 ret = check_dev_item(fs_info, eb, slot);
11474 case BTRFS_CHUNK_ITEM_KEY:
11475 ret = check_chunk_item(fs_info, eb, slot);
11478 case BTRFS_DEV_EXTENT_KEY:
11479 ret = check_dev_extent_item(fs_info, eb, slot);
11482 case BTRFS_EXTENT_ITEM_KEY:
11483 case BTRFS_METADATA_ITEM_KEY:
11484 ret = check_extent_item(fs_info, eb, slot);
11487 case BTRFS_EXTENT_CSUM_KEY:
11488 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11490 case BTRFS_TREE_BLOCK_REF_KEY:
11491 ret = check_tree_block_backref(fs_info, key.offset,
11495 case BTRFS_EXTENT_DATA_REF_KEY:
11496 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11497 ret = check_extent_data_backref(fs_info,
11498 btrfs_extent_data_ref_root(eb, dref),
11499 btrfs_extent_data_ref_objectid(eb, dref),
11500 btrfs_extent_data_ref_offset(eb, dref),
11502 btrfs_extent_data_ref_count(eb, dref));
11505 case BTRFS_SHARED_BLOCK_REF_KEY:
11506 ret = check_shared_block_backref(fs_info, key.offset,
11510 case BTRFS_SHARED_DATA_REF_KEY:
11511 ret = check_shared_data_backref(fs_info, key.offset,
11519 if (++slot < btrfs_header_nritems(eb))
11526 * Helper function for later fs/subvol tree check. To determine if a tree
11527 * block should be checked.
11528 * This function will ensure only the direct referencer with lowest rootid to
11529 * check a fs/subvolume tree block.
11531 * Backref check at extent tree would detect errors like missing subvolume
11532 * tree, so we can do aggressive check to reduce duplicated checks.
11534 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11536 struct btrfs_root *extent_root = root->fs_info->extent_root;
11537 struct btrfs_key key;
11538 struct btrfs_path path;
11539 struct extent_buffer *leaf;
11541 struct btrfs_extent_item *ei;
11547 struct btrfs_extent_inline_ref *iref;
11550 btrfs_init_path(&path);
11551 key.objectid = btrfs_header_bytenr(eb);
11552 key.type = BTRFS_METADATA_ITEM_KEY;
11553 key.offset = (u64)-1;
11556 * Any failure in backref resolving means we can't determine
11557 * whom the tree block belongs to.
11558 * So in that case, we need to check that tree block
11560 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11564 ret = btrfs_previous_extent_item(extent_root, &path,
11565 btrfs_header_bytenr(eb));
11569 leaf = path.nodes[0];
11570 slot = path.slots[0];
11571 btrfs_item_key_to_cpu(leaf, &key, slot);
11572 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11574 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11575 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11577 struct btrfs_tree_block_info *info;
11579 info = (struct btrfs_tree_block_info *)(ei + 1);
11580 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11583 item_size = btrfs_item_size_nr(leaf, slot);
11584 ptr = (unsigned long)iref;
11585 end = (unsigned long)ei + item_size;
11586 while (ptr < end) {
11587 iref = (struct btrfs_extent_inline_ref *)ptr;
11588 type = btrfs_extent_inline_ref_type(leaf, iref);
11589 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11592 * We only check the tree block if current root is
11593 * the lowest referencer of it.
11595 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11596 offset < root->objectid) {
11597 btrfs_release_path(&path);
11601 ptr += btrfs_extent_inline_ref_size(type);
11604 * Normally we should also check keyed tree block ref, but that may be
11605 * very time consuming. Inlined ref should already make us skip a lot
11606 * of refs now. So skip search keyed tree block ref.
11610 btrfs_release_path(&path);
11615 * Traversal function for tree block. We will do:
11616 * 1) Skip shared fs/subvolume tree blocks
11617 * 2) Update related bytes accounting
11618 * 3) Pre-order traversal
11620 static int traverse_tree_block(struct btrfs_root *root,
11621 struct extent_buffer *node)
11623 struct extent_buffer *eb;
11624 struct btrfs_key key;
11625 struct btrfs_key drop_key;
11633 * Skip shared fs/subvolume tree block, in that case they will
11634 * be checked by referencer with lowest rootid
11636 if (is_fstree(root->objectid) && !should_check(root, node))
11639 /* Update bytes accounting */
11640 total_btree_bytes += node->len;
11641 if (fs_root_objectid(btrfs_header_owner(node)))
11642 total_fs_tree_bytes += node->len;
11643 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11644 total_extent_tree_bytes += node->len;
11646 /* pre-order tranversal, check itself first */
11647 level = btrfs_header_level(node);
11648 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11649 btrfs_header_level(node),
11650 btrfs_header_owner(node));
11654 "check %s failed root %llu bytenr %llu level %d, force continue check",
11655 level ? "node":"leaf", root->objectid,
11656 btrfs_header_bytenr(node), btrfs_header_level(node));
11659 btree_space_waste += btrfs_leaf_free_space(root, node);
11660 ret = check_leaf_items(root, node);
11665 nr = btrfs_header_nritems(node);
11666 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11667 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11668 sizeof(struct btrfs_key_ptr);
11670 /* Then check all its children */
11671 for (i = 0; i < nr; i++) {
11672 u64 blocknr = btrfs_node_blockptr(node, i);
11674 btrfs_node_key_to_cpu(node, &key, i);
11675 if (level == root->root_item.drop_level &&
11676 is_dropped_key(&key, &drop_key))
11680 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11681 * to call the function itself.
11683 eb = read_tree_block(root->fs_info, blocknr, 0);
11684 if (extent_buffer_uptodate(eb)) {
11685 ret = traverse_tree_block(root, eb);
11688 free_extent_buffer(eb);
11695 * Low memory usage version check_chunks_and_extents.
11697 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
11699 struct btrfs_path path;
11700 struct btrfs_key key;
11701 struct btrfs_root *root1;
11702 struct btrfs_root *root;
11703 struct btrfs_root *cur_root;
11707 root = fs_info->fs_root;
11709 root1 = root->fs_info->chunk_root;
11710 ret = traverse_tree_block(root1, root1->node);
11713 root1 = root->fs_info->tree_root;
11714 ret = traverse_tree_block(root1, root1->node);
11717 btrfs_init_path(&path);
11718 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11720 key.type = BTRFS_ROOT_ITEM_KEY;
11722 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11724 error("cannot find extent treet in tree_root");
11729 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11730 if (key.type != BTRFS_ROOT_ITEM_KEY)
11732 key.offset = (u64)-1;
11734 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11735 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11738 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11739 if (IS_ERR(cur_root) || !cur_root) {
11740 error("failed to read tree: %lld", key.objectid);
11744 ret = traverse_tree_block(cur_root, cur_root->node);
11747 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11748 btrfs_free_fs_root(cur_root);
11750 ret = btrfs_next_item(root1, &path);
11756 btrfs_release_path(&path);
11760 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11764 if (!ctx.progress_enabled)
11765 fprintf(stderr, "checking extents\n");
11766 if (check_mode == CHECK_MODE_LOWMEM)
11767 ret = check_chunks_and_extents_v2(fs_info);
11769 ret = check_chunks_and_extents(fs_info);
11774 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11775 struct btrfs_root *root, int overwrite)
11777 struct extent_buffer *c;
11778 struct extent_buffer *old = root->node;
11781 struct btrfs_disk_key disk_key = {0,0,0};
11787 extent_buffer_get(c);
11790 c = btrfs_alloc_free_block(trans, root,
11791 root->fs_info->nodesize,
11792 root->root_key.objectid,
11793 &disk_key, level, 0, 0);
11796 extent_buffer_get(c);
11800 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11801 btrfs_set_header_level(c, level);
11802 btrfs_set_header_bytenr(c, c->start);
11803 btrfs_set_header_generation(c, trans->transid);
11804 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11805 btrfs_set_header_owner(c, root->root_key.objectid);
11807 write_extent_buffer(c, root->fs_info->fsid,
11808 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11810 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11811 btrfs_header_chunk_tree_uuid(c),
11814 btrfs_mark_buffer_dirty(c);
11816 * this case can happen in the following case:
11818 * 1.overwrite previous root.
11820 * 2.reinit reloc data root, this is because we skip pin
11821 * down reloc data tree before which means we can allocate
11822 * same block bytenr here.
11824 if (old->start == c->start) {
11825 btrfs_set_root_generation(&root->root_item,
11827 root->root_item.level = btrfs_header_level(root->node);
11828 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11829 &root->root_key, &root->root_item);
11831 free_extent_buffer(c);
11835 free_extent_buffer(old);
11837 add_root_to_dirty_list(root);
11841 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11842 struct extent_buffer *eb, int tree_root)
11844 struct extent_buffer *tmp;
11845 struct btrfs_root_item *ri;
11846 struct btrfs_key key;
11848 int level = btrfs_header_level(eb);
11854 * If we have pinned this block before, don't pin it again.
11855 * This can not only avoid forever loop with broken filesystem
11856 * but also give us some speedups.
11858 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11859 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11862 btrfs_pin_extent(fs_info, eb->start, eb->len);
11864 nritems = btrfs_header_nritems(eb);
11865 for (i = 0; i < nritems; i++) {
11867 btrfs_item_key_to_cpu(eb, &key, i);
11868 if (key.type != BTRFS_ROOT_ITEM_KEY)
11870 /* Skip the extent root and reloc roots */
11871 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11872 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11873 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11875 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11876 bytenr = btrfs_disk_root_bytenr(eb, ri);
11879 * If at any point we start needing the real root we
11880 * will have to build a stump root for the root we are
11881 * in, but for now this doesn't actually use the root so
11882 * just pass in extent_root.
11884 tmp = read_tree_block(fs_info, bytenr, 0);
11885 if (!extent_buffer_uptodate(tmp)) {
11886 fprintf(stderr, "Error reading root block\n");
11889 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11890 free_extent_buffer(tmp);
11894 bytenr = btrfs_node_blockptr(eb, i);
11896 /* If we aren't the tree root don't read the block */
11897 if (level == 1 && !tree_root) {
11898 btrfs_pin_extent(fs_info, bytenr,
11899 fs_info->nodesize);
11903 tmp = read_tree_block(fs_info, bytenr, 0);
11904 if (!extent_buffer_uptodate(tmp)) {
11905 fprintf(stderr, "Error reading tree block\n");
11908 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11909 free_extent_buffer(tmp);
11918 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11922 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11926 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11929 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11931 struct btrfs_block_group_cache *cache;
11932 struct btrfs_path path;
11933 struct extent_buffer *leaf;
11934 struct btrfs_chunk *chunk;
11935 struct btrfs_key key;
11939 btrfs_init_path(&path);
11941 key.type = BTRFS_CHUNK_ITEM_KEY;
11943 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11945 btrfs_release_path(&path);
11950 * We do this in case the block groups were screwed up and had alloc
11951 * bits that aren't actually set on the chunks. This happens with
11952 * restored images every time and could happen in real life I guess.
11954 fs_info->avail_data_alloc_bits = 0;
11955 fs_info->avail_metadata_alloc_bits = 0;
11956 fs_info->avail_system_alloc_bits = 0;
11958 /* First we need to create the in-memory block groups */
11960 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11961 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11963 btrfs_release_path(&path);
11971 leaf = path.nodes[0];
11972 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11973 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11978 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11979 btrfs_add_block_group(fs_info, 0,
11980 btrfs_chunk_type(leaf, chunk),
11981 key.objectid, key.offset,
11982 btrfs_chunk_length(leaf, chunk));
11983 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11984 key.offset + btrfs_chunk_length(leaf, chunk));
11989 cache = btrfs_lookup_first_block_group(fs_info, start);
11993 start = cache->key.objectid + cache->key.offset;
11996 btrfs_release_path(&path);
12000 static int reset_balance(struct btrfs_trans_handle *trans,
12001 struct btrfs_fs_info *fs_info)
12003 struct btrfs_root *root = fs_info->tree_root;
12004 struct btrfs_path path;
12005 struct extent_buffer *leaf;
12006 struct btrfs_key key;
12007 int del_slot, del_nr = 0;
12011 btrfs_init_path(&path);
12012 key.objectid = BTRFS_BALANCE_OBJECTID;
12013 key.type = BTRFS_BALANCE_ITEM_KEY;
12015 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12020 goto reinit_data_reloc;
12025 ret = btrfs_del_item(trans, root, &path);
12028 btrfs_release_path(&path);
12030 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12031 key.type = BTRFS_ROOT_ITEM_KEY;
12033 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
12037 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12042 ret = btrfs_del_items(trans, root, &path,
12049 btrfs_release_path(&path);
12052 ret = btrfs_search_slot(trans, root, &key, &path,
12059 leaf = path.nodes[0];
12060 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12061 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
12063 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12068 del_slot = path.slots[0];
12077 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
12081 btrfs_release_path(&path);
12084 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
12085 key.type = BTRFS_ROOT_ITEM_KEY;
12086 key.offset = (u64)-1;
12087 root = btrfs_read_fs_root(fs_info, &key);
12088 if (IS_ERR(root)) {
12089 fprintf(stderr, "Error reading data reloc tree\n");
12090 ret = PTR_ERR(root);
12093 record_root_in_trans(trans, root);
12094 ret = btrfs_fsck_reinit_root(trans, root, 0);
12097 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
12099 btrfs_release_path(&path);
12103 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
12104 struct btrfs_fs_info *fs_info)
12110 * The only reason we don't do this is because right now we're just
12111 * walking the trees we find and pinning down their bytes, we don't look
12112 * at any of the leaves. In order to do mixed groups we'd have to check
12113 * the leaves of any fs roots and pin down the bytes for any file
12114 * extents we find. Not hard but why do it if we don't have to?
12116 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
12117 fprintf(stderr, "We don't support re-initing the extent tree "
12118 "for mixed block groups yet, please notify a btrfs "
12119 "developer you want to do this so they can add this "
12120 "functionality.\n");
12125 * first we need to walk all of the trees except the extent tree and pin
12126 * down the bytes that are in use so we don't overwrite any existing
12129 ret = pin_metadata_blocks(fs_info);
12131 fprintf(stderr, "error pinning down used bytes\n");
12136 * Need to drop all the block groups since we're going to recreate all
12139 btrfs_free_block_groups(fs_info);
12140 ret = reset_block_groups(fs_info);
12142 fprintf(stderr, "error resetting the block groups\n");
12146 /* Ok we can allocate now, reinit the extent root */
12147 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
12149 fprintf(stderr, "extent root initialization failed\n");
12151 * When the transaction code is updated we should end the
12152 * transaction, but for now progs only knows about commit so
12153 * just return an error.
12159 * Now we have all the in-memory block groups setup so we can make
12160 * allocations properly, and the metadata we care about is safe since we
12161 * pinned all of it above.
12164 struct btrfs_block_group_cache *cache;
12166 cache = btrfs_lookup_first_block_group(fs_info, start);
12169 start = cache->key.objectid + cache->key.offset;
12170 ret = btrfs_insert_item(trans, fs_info->extent_root,
12171 &cache->key, &cache->item,
12172 sizeof(cache->item));
12174 fprintf(stderr, "Error adding block group\n");
12177 btrfs_extent_post_op(trans, fs_info->extent_root);
12180 ret = reset_balance(trans, fs_info);
12182 fprintf(stderr, "error resetting the pending balance\n");
12187 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12189 struct btrfs_path path;
12190 struct btrfs_trans_handle *trans;
12191 struct btrfs_key key;
12194 printf("Recowing metadata block %llu\n", eb->start);
12195 key.objectid = btrfs_header_owner(eb);
12196 key.type = BTRFS_ROOT_ITEM_KEY;
12197 key.offset = (u64)-1;
12199 root = btrfs_read_fs_root(root->fs_info, &key);
12200 if (IS_ERR(root)) {
12201 fprintf(stderr, "Couldn't find owner root %llu\n",
12203 return PTR_ERR(root);
12206 trans = btrfs_start_transaction(root, 1);
12208 return PTR_ERR(trans);
12210 btrfs_init_path(&path);
12211 path.lowest_level = btrfs_header_level(eb);
12212 if (path.lowest_level)
12213 btrfs_node_key_to_cpu(eb, &key, 0);
12215 btrfs_item_key_to_cpu(eb, &key, 0);
12217 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12218 btrfs_commit_transaction(trans, root);
12219 btrfs_release_path(&path);
12223 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12225 struct btrfs_path path;
12226 struct btrfs_trans_handle *trans;
12227 struct btrfs_key key;
12230 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12231 bad->key.type, bad->key.offset);
12232 key.objectid = bad->root_id;
12233 key.type = BTRFS_ROOT_ITEM_KEY;
12234 key.offset = (u64)-1;
12236 root = btrfs_read_fs_root(root->fs_info, &key);
12237 if (IS_ERR(root)) {
12238 fprintf(stderr, "Couldn't find owner root %llu\n",
12240 return PTR_ERR(root);
12243 trans = btrfs_start_transaction(root, 1);
12245 return PTR_ERR(trans);
12247 btrfs_init_path(&path);
12248 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12254 ret = btrfs_del_item(trans, root, &path);
12256 btrfs_commit_transaction(trans, root);
12257 btrfs_release_path(&path);
12261 static int zero_log_tree(struct btrfs_root *root)
12263 struct btrfs_trans_handle *trans;
12266 trans = btrfs_start_transaction(root, 1);
12267 if (IS_ERR(trans)) {
12268 ret = PTR_ERR(trans);
12271 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12272 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12273 ret = btrfs_commit_transaction(trans, root);
12277 static int populate_csum(struct btrfs_trans_handle *trans,
12278 struct btrfs_root *csum_root, char *buf, u64 start,
12281 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12286 while (offset < len) {
12287 sectorsize = fs_info->sectorsize;
12288 ret = read_extent_data(fs_info, buf, start + offset,
12292 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12293 start + offset, buf, sectorsize);
12296 offset += sectorsize;
12301 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12302 struct btrfs_root *csum_root,
12303 struct btrfs_root *cur_root)
12305 struct btrfs_path path;
12306 struct btrfs_key key;
12307 struct extent_buffer *node;
12308 struct btrfs_file_extent_item *fi;
12315 buf = malloc(cur_root->fs_info->sectorsize);
12319 btrfs_init_path(&path);
12323 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12326 /* Iterate all regular file extents and fill its csum */
12328 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12330 if (key.type != BTRFS_EXTENT_DATA_KEY)
12332 node = path.nodes[0];
12333 slot = path.slots[0];
12334 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12335 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12337 start = btrfs_file_extent_disk_bytenr(node, fi);
12338 len = btrfs_file_extent_disk_num_bytes(node, fi);
12340 ret = populate_csum(trans, csum_root, buf, start, len);
12341 if (ret == -EEXIST)
12347 * TODO: if next leaf is corrupted, jump to nearest next valid
12350 ret = btrfs_next_item(cur_root, &path);
12360 btrfs_release_path(&path);
12365 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12366 struct btrfs_root *csum_root)
12368 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12369 struct btrfs_path path;
12370 struct btrfs_root *tree_root = fs_info->tree_root;
12371 struct btrfs_root *cur_root;
12372 struct extent_buffer *node;
12373 struct btrfs_key key;
12377 btrfs_init_path(&path);
12378 key.objectid = BTRFS_FS_TREE_OBJECTID;
12380 key.type = BTRFS_ROOT_ITEM_KEY;
12381 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12390 node = path.nodes[0];
12391 slot = path.slots[0];
12392 btrfs_item_key_to_cpu(node, &key, slot);
12393 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12395 if (key.type != BTRFS_ROOT_ITEM_KEY)
12397 if (!is_fstree(key.objectid))
12399 key.offset = (u64)-1;
12401 cur_root = btrfs_read_fs_root(fs_info, &key);
12402 if (IS_ERR(cur_root) || !cur_root) {
12403 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12407 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12412 ret = btrfs_next_item(tree_root, &path);
12422 btrfs_release_path(&path);
12426 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12427 struct btrfs_root *csum_root)
12429 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12430 struct btrfs_path path;
12431 struct btrfs_extent_item *ei;
12432 struct extent_buffer *leaf;
12434 struct btrfs_key key;
12437 btrfs_init_path(&path);
12439 key.type = BTRFS_EXTENT_ITEM_KEY;
12441 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12443 btrfs_release_path(&path);
12447 buf = malloc(csum_root->fs_info->sectorsize);
12449 btrfs_release_path(&path);
12454 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12455 ret = btrfs_next_leaf(extent_root, &path);
12463 leaf = path.nodes[0];
12465 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12466 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12471 ei = btrfs_item_ptr(leaf, path.slots[0],
12472 struct btrfs_extent_item);
12473 if (!(btrfs_extent_flags(leaf, ei) &
12474 BTRFS_EXTENT_FLAG_DATA)) {
12479 ret = populate_csum(trans, csum_root, buf, key.objectid,
12486 btrfs_release_path(&path);
12492 * Recalculate the csum and put it into the csum tree.
12494 * Extent tree init will wipe out all the extent info, so in that case, we
12495 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12496 * will use fs/subvol trees to init the csum tree.
12498 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12499 struct btrfs_root *csum_root,
12500 int search_fs_tree)
12502 if (search_fs_tree)
12503 return fill_csum_tree_from_fs(trans, csum_root);
12505 return fill_csum_tree_from_extent(trans, csum_root);
12508 static void free_roots_info_cache(void)
12510 if (!roots_info_cache)
12513 while (!cache_tree_empty(roots_info_cache)) {
12514 struct cache_extent *entry;
12515 struct root_item_info *rii;
12517 entry = first_cache_extent(roots_info_cache);
12520 remove_cache_extent(roots_info_cache, entry);
12521 rii = container_of(entry, struct root_item_info, cache_extent);
12525 free(roots_info_cache);
12526 roots_info_cache = NULL;
12529 static int build_roots_info_cache(struct btrfs_fs_info *info)
12532 struct btrfs_key key;
12533 struct extent_buffer *leaf;
12534 struct btrfs_path path;
12536 if (!roots_info_cache) {
12537 roots_info_cache = malloc(sizeof(*roots_info_cache));
12538 if (!roots_info_cache)
12540 cache_tree_init(roots_info_cache);
12543 btrfs_init_path(&path);
12545 key.type = BTRFS_EXTENT_ITEM_KEY;
12547 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12550 leaf = path.nodes[0];
12553 struct btrfs_key found_key;
12554 struct btrfs_extent_item *ei;
12555 struct btrfs_extent_inline_ref *iref;
12556 int slot = path.slots[0];
12561 struct cache_extent *entry;
12562 struct root_item_info *rii;
12564 if (slot >= btrfs_header_nritems(leaf)) {
12565 ret = btrfs_next_leaf(info->extent_root, &path);
12572 leaf = path.nodes[0];
12573 slot = path.slots[0];
12576 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12578 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12579 found_key.type != BTRFS_METADATA_ITEM_KEY)
12582 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12583 flags = btrfs_extent_flags(leaf, ei);
12585 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12586 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12589 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12590 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12591 level = found_key.offset;
12593 struct btrfs_tree_block_info *binfo;
12595 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12596 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12597 level = btrfs_tree_block_level(leaf, binfo);
12601 * For a root extent, it must be of the following type and the
12602 * first (and only one) iref in the item.
12604 type = btrfs_extent_inline_ref_type(leaf, iref);
12605 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12608 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12609 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12611 rii = malloc(sizeof(struct root_item_info));
12616 rii->cache_extent.start = root_id;
12617 rii->cache_extent.size = 1;
12618 rii->level = (u8)-1;
12619 entry = &rii->cache_extent;
12620 ret = insert_cache_extent(roots_info_cache, entry);
12623 rii = container_of(entry, struct root_item_info,
12627 ASSERT(rii->cache_extent.start == root_id);
12628 ASSERT(rii->cache_extent.size == 1);
12630 if (level > rii->level || rii->level == (u8)-1) {
12631 rii->level = level;
12632 rii->bytenr = found_key.objectid;
12633 rii->gen = btrfs_extent_generation(leaf, ei);
12634 rii->node_count = 1;
12635 } else if (level == rii->level) {
12643 btrfs_release_path(&path);
12648 static int maybe_repair_root_item(struct btrfs_path *path,
12649 const struct btrfs_key *root_key,
12650 const int read_only_mode)
12652 const u64 root_id = root_key->objectid;
12653 struct cache_extent *entry;
12654 struct root_item_info *rii;
12655 struct btrfs_root_item ri;
12656 unsigned long offset;
12658 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12661 "Error: could not find extent items for root %llu\n",
12662 root_key->objectid);
12666 rii = container_of(entry, struct root_item_info, cache_extent);
12667 ASSERT(rii->cache_extent.start == root_id);
12668 ASSERT(rii->cache_extent.size == 1);
12670 if (rii->node_count != 1) {
12672 "Error: could not find btree root extent for root %llu\n",
12677 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12678 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12680 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12681 btrfs_root_level(&ri) != rii->level ||
12682 btrfs_root_generation(&ri) != rii->gen) {
12685 * If we're in repair mode but our caller told us to not update
12686 * the root item, i.e. just check if it needs to be updated, don't
12687 * print this message, since the caller will call us again shortly
12688 * for the same root item without read only mode (the caller will
12689 * open a transaction first).
12691 if (!(read_only_mode && repair))
12693 "%sroot item for root %llu,"
12694 " current bytenr %llu, current gen %llu, current level %u,"
12695 " new bytenr %llu, new gen %llu, new level %u\n",
12696 (read_only_mode ? "" : "fixing "),
12698 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12699 btrfs_root_level(&ri),
12700 rii->bytenr, rii->gen, rii->level);
12702 if (btrfs_root_generation(&ri) > rii->gen) {
12704 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12705 root_id, btrfs_root_generation(&ri), rii->gen);
12709 if (!read_only_mode) {
12710 btrfs_set_root_bytenr(&ri, rii->bytenr);
12711 btrfs_set_root_level(&ri, rii->level);
12712 btrfs_set_root_generation(&ri, rii->gen);
12713 write_extent_buffer(path->nodes[0], &ri,
12714 offset, sizeof(ri));
12724 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12725 * caused read-only snapshots to be corrupted if they were created at a moment
12726 * when the source subvolume/snapshot had orphan items. The issue was that the
12727 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12728 * node instead of the post orphan cleanup root node.
12729 * So this function, and its callees, just detects and fixes those cases. Even
12730 * though the regression was for read-only snapshots, this function applies to
12731 * any snapshot/subvolume root.
12732 * This must be run before any other repair code - not doing it so, makes other
12733 * repair code delete or modify backrefs in the extent tree for example, which
12734 * will result in an inconsistent fs after repairing the root items.
12736 static int repair_root_items(struct btrfs_fs_info *info)
12738 struct btrfs_path path;
12739 struct btrfs_key key;
12740 struct extent_buffer *leaf;
12741 struct btrfs_trans_handle *trans = NULL;
12744 int need_trans = 0;
12746 btrfs_init_path(&path);
12748 ret = build_roots_info_cache(info);
12752 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12753 key.type = BTRFS_ROOT_ITEM_KEY;
12758 * Avoid opening and committing transactions if a leaf doesn't have
12759 * any root items that need to be fixed, so that we avoid rotating
12760 * backup roots unnecessarily.
12763 trans = btrfs_start_transaction(info->tree_root, 1);
12764 if (IS_ERR(trans)) {
12765 ret = PTR_ERR(trans);
12770 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12774 leaf = path.nodes[0];
12777 struct btrfs_key found_key;
12779 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12780 int no_more_keys = find_next_key(&path, &key);
12782 btrfs_release_path(&path);
12784 ret = btrfs_commit_transaction(trans,
12796 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12798 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12800 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12803 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12807 if (!trans && repair) {
12810 btrfs_release_path(&path);
12820 free_roots_info_cache();
12821 btrfs_release_path(&path);
12823 btrfs_commit_transaction(trans, info->tree_root);
12830 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12832 struct btrfs_trans_handle *trans;
12833 struct btrfs_block_group_cache *bg_cache;
12837 /* Clear all free space cache inodes and its extent data */
12839 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12842 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12845 current = bg_cache->key.objectid + bg_cache->key.offset;
12848 /* Don't forget to set cache_generation to -1 */
12849 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12850 if (IS_ERR(trans)) {
12851 error("failed to update super block cache generation");
12852 return PTR_ERR(trans);
12854 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12855 btrfs_commit_transaction(trans, fs_info->tree_root);
12860 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
12865 if (clear_version == 1) {
12866 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12868 "free space cache v2 detected, use --clear-space-cache v2");
12872 printf("Clearing free space cache\n");
12873 ret = clear_free_space_cache(fs_info);
12875 error("failed to clear free space cache");
12878 printf("Free space cache cleared\n");
12880 } else if (clear_version == 2) {
12881 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
12882 printf("no free space cache v2 to clear\n");
12886 printf("Clear free space cache v2\n");
12887 ret = btrfs_clear_free_space_tree(fs_info);
12889 error("failed to clear free space cache v2: %d", ret);
12892 printf("free space cache v2 cleared\n");
12899 const char * const cmd_check_usage[] = {
12900 "btrfs check [options] <device>",
12901 "Check structural integrity of a filesystem (unmounted).",
12902 "Check structural integrity of an unmounted filesystem. Verify internal",
12903 "trees' consistency and item connectivity. In the repair mode try to",
12904 "fix the problems found. ",
12905 "WARNING: the repair mode is considered dangerous",
12907 "-s|--super <superblock> use this superblock copy",
12908 "-b|--backup use the first valid backup root copy",
12909 "--force skip mount checks, repair is not possible",
12910 "--repair try to repair the filesystem",
12911 "--readonly run in read-only mode (default)",
12912 "--init-csum-tree create a new CRC tree",
12913 "--init-extent-tree create a new extent tree",
12914 "--mode <MODE> allows choice of memory/IO trade-offs",
12915 " where MODE is one of:",
12916 " original - read inodes and extents to memory (requires",
12917 " more memory, does less IO)",
12918 " lowmem - try to use less memory but read blocks again",
12920 "--check-data-csum verify checksums of data blocks",
12921 "-Q|--qgroup-report print a report on qgroup consistency",
12922 "-E|--subvol-extents <subvolid>",
12923 " print subvolume extents and sharing state",
12924 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12925 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12926 "-p|--progress indicate progress",
12927 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12931 int cmd_check(int argc, char **argv)
12933 struct cache_tree root_cache;
12934 struct btrfs_root *root;
12935 struct btrfs_fs_info *info;
12938 u64 tree_root_bytenr = 0;
12939 u64 chunk_root_bytenr = 0;
12940 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12944 int init_csum_tree = 0;
12946 int clear_space_cache = 0;
12947 int qgroup_report = 0;
12948 int qgroups_repaired = 0;
12949 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12954 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12955 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12956 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12957 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
12958 GETOPT_VAL_FORCE };
12959 static const struct option long_options[] = {
12960 { "super", required_argument, NULL, 's' },
12961 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12962 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12963 { "init-csum-tree", no_argument, NULL,
12964 GETOPT_VAL_INIT_CSUM },
12965 { "init-extent-tree", no_argument, NULL,
12966 GETOPT_VAL_INIT_EXTENT },
12967 { "check-data-csum", no_argument, NULL,
12968 GETOPT_VAL_CHECK_CSUM },
12969 { "backup", no_argument, NULL, 'b' },
12970 { "subvol-extents", required_argument, NULL, 'E' },
12971 { "qgroup-report", no_argument, NULL, 'Q' },
12972 { "tree-root", required_argument, NULL, 'r' },
12973 { "chunk-root", required_argument, NULL,
12974 GETOPT_VAL_CHUNK_TREE },
12975 { "progress", no_argument, NULL, 'p' },
12976 { "mode", required_argument, NULL,
12978 { "clear-space-cache", required_argument, NULL,
12979 GETOPT_VAL_CLEAR_SPACE_CACHE},
12980 { "force", no_argument, NULL, GETOPT_VAL_FORCE },
12981 { NULL, 0, NULL, 0}
12984 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12988 case 'a': /* ignored */ break;
12990 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12993 num = arg_strtou64(optarg);
12994 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12996 "super mirror should be less than %d",
12997 BTRFS_SUPER_MIRROR_MAX);
13000 bytenr = btrfs_sb_offset(((int)num));
13001 printf("using SB copy %llu, bytenr %llu\n", num,
13002 (unsigned long long)bytenr);
13008 subvolid = arg_strtou64(optarg);
13011 tree_root_bytenr = arg_strtou64(optarg);
13013 case GETOPT_VAL_CHUNK_TREE:
13014 chunk_root_bytenr = arg_strtou64(optarg);
13017 ctx.progress_enabled = true;
13021 usage(cmd_check_usage);
13022 case GETOPT_VAL_REPAIR:
13023 printf("enabling repair mode\n");
13025 ctree_flags |= OPEN_CTREE_WRITES;
13027 case GETOPT_VAL_READONLY:
13030 case GETOPT_VAL_INIT_CSUM:
13031 printf("Creating a new CRC tree\n");
13032 init_csum_tree = 1;
13034 ctree_flags |= OPEN_CTREE_WRITES;
13036 case GETOPT_VAL_INIT_EXTENT:
13037 init_extent_tree = 1;
13038 ctree_flags |= (OPEN_CTREE_WRITES |
13039 OPEN_CTREE_NO_BLOCK_GROUPS);
13042 case GETOPT_VAL_CHECK_CSUM:
13043 check_data_csum = 1;
13045 case GETOPT_VAL_MODE:
13046 check_mode = parse_check_mode(optarg);
13047 if (check_mode == CHECK_MODE_UNKNOWN) {
13048 error("unknown mode: %s", optarg);
13052 case GETOPT_VAL_CLEAR_SPACE_CACHE:
13053 if (strcmp(optarg, "v1") == 0) {
13054 clear_space_cache = 1;
13055 } else if (strcmp(optarg, "v2") == 0) {
13056 clear_space_cache = 2;
13057 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
13060 "invalid argument to --clear-space-cache, must be v1 or v2");
13063 ctree_flags |= OPEN_CTREE_WRITES;
13065 case GETOPT_VAL_FORCE:
13071 if (check_argc_exact(argc - optind, 1))
13072 usage(cmd_check_usage);
13074 if (ctx.progress_enabled) {
13075 ctx.tp = TASK_NOTHING;
13076 ctx.info = task_init(print_status_check, print_status_return, &ctx);
13079 /* This check is the only reason for --readonly to exist */
13080 if (readonly && repair) {
13081 error("repair options are not compatible with --readonly");
13086 * Not supported yet
13088 if (repair && check_mode == CHECK_MODE_LOWMEM) {
13089 error("low memory mode doesn't support repair yet");
13094 cache_tree_init(&root_cache);
13096 ret = check_mounted(argv[optind]);
13099 error("could not check mount status: %s",
13105 "%s is currently mounted, use --force if you really intend to check the filesystem",
13113 error("repair and --force is not yet supported");
13120 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
13124 "filesystem mounted, continuing because of --force");
13128 /* only allow partial opening under repair mode */
13130 ctree_flags |= OPEN_CTREE_PARTIAL;
13132 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
13133 chunk_root_bytenr, ctree_flags);
13135 error("cannot open file system");
13141 global_info = info;
13142 root = info->fs_root;
13143 uuid_unparse(info->super_copy->fsid, uuidbuf);
13145 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
13148 * Check the bare minimum before starting anything else that could rely
13149 * on it, namely the tree roots, any local consistency checks
13151 if (!extent_buffer_uptodate(info->tree_root->node) ||
13152 !extent_buffer_uptodate(info->dev_root->node) ||
13153 !extent_buffer_uptodate(info->chunk_root->node)) {
13154 error("critical roots corrupted, unable to check the filesystem");
13160 if (clear_space_cache) {
13161 ret = do_clear_free_space_cache(info, clear_space_cache);
13167 * repair mode will force us to commit transaction which
13168 * will make us fail to load log tree when mounting.
13170 if (repair && btrfs_super_log_root(info->super_copy)) {
13171 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
13177 ret = zero_log_tree(root);
13180 error("failed to zero log tree: %d", ret);
13185 if (qgroup_report) {
13186 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
13188 ret = qgroup_verify_all(info);
13195 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
13196 subvolid, argv[optind], uuidbuf);
13197 ret = print_extent_state(info, subvolid);
13202 if (init_extent_tree || init_csum_tree) {
13203 struct btrfs_trans_handle *trans;
13205 trans = btrfs_start_transaction(info->extent_root, 0);
13206 if (IS_ERR(trans)) {
13207 error("error starting transaction");
13208 ret = PTR_ERR(trans);
13213 if (init_extent_tree) {
13214 printf("Creating a new extent tree\n");
13215 ret = reinit_extent_tree(trans, info);
13221 if (init_csum_tree) {
13222 printf("Reinitialize checksum tree\n");
13223 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
13225 error("checksum tree initialization failed: %d",
13232 ret = fill_csum_tree(trans, info->csum_root,
13236 error("checksum tree refilling failed: %d", ret);
13241 * Ok now we commit and run the normal fsck, which will add
13242 * extent entries for all of the items it finds.
13244 ret = btrfs_commit_transaction(trans, info->extent_root);
13249 if (!extent_buffer_uptodate(info->extent_root->node)) {
13250 error("critical: extent_root, unable to check the filesystem");
13255 if (!extent_buffer_uptodate(info->csum_root->node)) {
13256 error("critical: csum_root, unable to check the filesystem");
13262 ret = do_check_chunks_and_extents(info);
13266 "errors found in extent allocation tree or chunk allocation");
13268 ret = repair_root_items(info);
13271 error("failed to repair root items: %s", strerror(-ret));
13275 fprintf(stderr, "Fixed %d roots.\n", ret);
13277 } else if (ret > 0) {
13279 "Found %d roots with an outdated root item.\n",
13282 "Please run a filesystem check with the option --repair to fix them.\n");
13288 if (!ctx.progress_enabled) {
13289 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13290 fprintf(stderr, "checking free space tree\n");
13292 fprintf(stderr, "checking free space cache\n");
13294 ret = check_space_cache(root);
13297 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13298 error("errors found in free space tree");
13300 error("errors found in free space cache");
13305 * We used to have to have these hole extents in between our real
13306 * extents so if we don't have this flag set we need to make sure there
13307 * are no gaps in the file extents for inodes, otherwise we can just
13308 * ignore it when this happens.
13310 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13311 ret = do_check_fs_roots(info, &root_cache);
13314 error("errors found in fs roots");
13318 fprintf(stderr, "checking csums\n");
13319 ret = check_csums(root);
13322 error("errors found in csum tree");
13326 fprintf(stderr, "checking root refs\n");
13327 /* For low memory mode, check_fs_roots_v2 handles root refs */
13328 if (check_mode != CHECK_MODE_LOWMEM) {
13329 ret = check_root_refs(root, &root_cache);
13332 error("errors found in root refs");
13337 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13338 struct extent_buffer *eb;
13340 eb = list_first_entry(&root->fs_info->recow_ebs,
13341 struct extent_buffer, recow);
13342 list_del_init(&eb->recow);
13343 ret = recow_extent_buffer(root, eb);
13346 error("fails to fix transid errors");
13351 while (!list_empty(&delete_items)) {
13352 struct bad_item *bad;
13354 bad = list_first_entry(&delete_items, struct bad_item, list);
13355 list_del_init(&bad->list);
13357 ret = delete_bad_item(root, bad);
13363 if (info->quota_enabled) {
13364 fprintf(stderr, "checking quota groups\n");
13365 ret = qgroup_verify_all(info);
13368 error("failed to check quota groups");
13372 ret = repair_qgroups(info, &qgroups_repaired);
13375 error("failed to repair quota groups");
13381 if (!list_empty(&root->fs_info->recow_ebs)) {
13382 error("transid errors in file system");
13387 printf("found %llu bytes used, ",
13388 (unsigned long long)bytes_used);
13390 printf("error(s) found\n");
13392 printf("no error found\n");
13393 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13394 printf("total tree bytes: %llu\n",
13395 (unsigned long long)total_btree_bytes);
13396 printf("total fs tree bytes: %llu\n",
13397 (unsigned long long)total_fs_tree_bytes);
13398 printf("total extent tree bytes: %llu\n",
13399 (unsigned long long)total_extent_tree_bytes);
13400 printf("btree space waste bytes: %llu\n",
13401 (unsigned long long)btree_space_waste);
13402 printf("file data blocks allocated: %llu\n referenced %llu\n",
13403 (unsigned long long)data_bytes_allocated,
13404 (unsigned long long)data_bytes_referenced);
13406 free_qgroup_counts();
13407 free_root_recs_tree(&root_cache);
13411 if (ctx.progress_enabled)
13412 task_deinit(ctx.info);