2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
878 node = container_of(cache, struct ptr_node, cache);
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
888 rec = calloc(1, sizeof(*rec));
890 return ERR_PTR(-ENOMEM);
892 rec->extent_start = (u64)-1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
910 ret = insert_cache_extent(inode_cache, &node->cache);
912 return ERR_PTR(-EEXIST);
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
962 if (!rec->found_inode_item)
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
978 if (!rec->checked || rec->merging)
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1069 if (backref->dir != dir || backref->namelen != namelen)
1071 if (memcmp(name, backref->name, namelen))
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1079 memset(backref, 0, sizeof(*backref));
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1137 maybe_free_inode_rec(inode_cache, rec);
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1234 if (--src_node->refs == 0)
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1242 cache = search_cache_extent(src, 0);
1244 node = container_of(cache, struct ptr_node, cache);
1246 cache = next_cache_extent(cache);
1249 remove_cache_extent(src, &node->cache);
1252 ins = malloc(sizeof(*ins));
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1302 free_inode_rec(rec);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1316 node = container_of(cache, struct shared_node, cache);
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1336 ret = insert_cache_extent(shared, &node->cache);
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1348 if (level == wc->active_node)
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1389 if (level == wc->root_level)
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1408 BUG_ON(node->refs < 2);
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1440 btrfs_release_path(&path);
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1476 btrfs_release_path(&path);
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1522 len = min_t(u32, total - cur - sizeof(*di),
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532 key->offset != btrfs_name_hash(namebuf, len)) {
1533 rec->errors |= I_ERR_ODD_DIR_ITEM;
1534 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535 key->objectid, key->offset, namebuf, len, filetype,
1536 key->offset, btrfs_name_hash(namebuf, len));
1539 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540 add_inode_backref(inode_cache, location.objectid,
1541 key->objectid, key->offset, namebuf,
1542 len, filetype, key->type, error);
1543 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544 add_inode_backref(root_cache, location.objectid,
1545 key->objectid, key->offset,
1546 namebuf, len, filetype,
1549 fprintf(stderr, "invalid location in dir item %u\n",
1551 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552 key->objectid, key->offset, namebuf,
1553 len, filetype, key->type, error);
1556 len = sizeof(*di) + name_len + data_len;
1557 di = (struct btrfs_dir_item *)((char *)di + len);
1560 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561 rec->errors |= I_ERR_DUP_DIR_INDEX;
1566 static int process_inode_ref(struct extent_buffer *eb,
1567 int slot, struct btrfs_key *key,
1568 struct shared_node *active_node)
1576 struct cache_tree *inode_cache;
1577 struct btrfs_inode_ref *ref;
1578 char namebuf[BTRFS_NAME_LEN];
1580 inode_cache = &active_node->inode_cache;
1582 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583 total = btrfs_item_size_nr(eb, slot);
1584 while (cur < total) {
1585 name_len = btrfs_inode_ref_name_len(eb, ref);
1586 index = btrfs_inode_ref_index(eb, ref);
1588 /* inode_ref + namelen should not cross item boundary */
1589 if (cur + sizeof(*ref) + name_len > total ||
1590 name_len > BTRFS_NAME_LEN) {
1591 if (total < cur + sizeof(*ref))
1594 /* Still try to read out the remaining part */
1595 len = min_t(u32, total - cur - sizeof(*ref),
1597 error = REF_ERR_NAME_TOO_LONG;
1603 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, key->offset,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*ref) + name_len;
1608 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1614 static int process_inode_extref(struct extent_buffer *eb,
1615 int slot, struct btrfs_key *key,
1616 struct shared_node *active_node)
1625 struct cache_tree *inode_cache;
1626 struct btrfs_inode_extref *extref;
1627 char namebuf[BTRFS_NAME_LEN];
1629 inode_cache = &active_node->inode_cache;
1631 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632 total = btrfs_item_size_nr(eb, slot);
1633 while (cur < total) {
1634 name_len = btrfs_inode_extref_name_len(eb, extref);
1635 index = btrfs_inode_extref_index(eb, extref);
1636 parent = btrfs_inode_extref_parent(eb, extref);
1637 if (name_len <= BTRFS_NAME_LEN) {
1641 len = BTRFS_NAME_LEN;
1642 error = REF_ERR_NAME_TOO_LONG;
1644 read_extent_buffer(eb, namebuf,
1645 (unsigned long)(extref + 1), len);
1646 add_inode_backref(inode_cache, key->objectid, parent,
1647 index, namebuf, len, 0, key->type, error);
1649 len = sizeof(*extref) + name_len;
1650 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658 u64 len, u64 *found)
1660 struct btrfs_key key;
1661 struct btrfs_path path;
1662 struct extent_buffer *leaf;
1667 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1669 btrfs_init_path(&path);
1671 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1673 key.type = BTRFS_EXTENT_CSUM_KEY;
1675 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1679 if (ret > 0 && path.slots[0] > 0) {
1680 leaf = path.nodes[0];
1681 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683 key.type == BTRFS_EXTENT_CSUM_KEY)
1688 leaf = path.nodes[0];
1689 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1695 leaf = path.nodes[0];
1698 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700 key.type != BTRFS_EXTENT_CSUM_KEY)
1703 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704 if (key.offset >= start + len)
1707 if (key.offset > start)
1710 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711 csum_end = key.offset + (size / csum_size) *
1712 root->fs_info->sectorsize;
1713 if (csum_end > start) {
1714 size = min(csum_end - start, len);
1723 btrfs_release_path(&path);
1729 static int process_file_extent(struct btrfs_root *root,
1730 struct extent_buffer *eb,
1731 int slot, struct btrfs_key *key,
1732 struct shared_node *active_node)
1734 struct inode_record *rec;
1735 struct btrfs_file_extent_item *fi;
1737 u64 disk_bytenr = 0;
1738 u64 extent_offset = 0;
1739 u64 mask = root->fs_info->sectorsize - 1;
1743 rec = active_node->current;
1744 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745 rec->found_file_extent = 1;
1747 if (rec->extent_start == (u64)-1) {
1748 rec->extent_start = key->offset;
1749 rec->extent_end = key->offset;
1752 if (rec->extent_end > key->offset)
1753 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754 else if (rec->extent_end < key->offset) {
1755 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756 key->offset - rec->extent_end);
1761 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762 extent_type = btrfs_file_extent_type(eb, fi);
1764 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1767 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768 rec->found_size += num_bytes;
1769 num_bytes = (num_bytes + mask) & ~mask;
1770 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774 extent_offset = btrfs_file_extent_offset(eb, fi);
1775 if (num_bytes == 0 || (num_bytes & mask))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (num_bytes + extent_offset >
1778 btrfs_file_extent_ram_bytes(eb, fi))
1779 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781 (btrfs_file_extent_compression(eb, fi) ||
1782 btrfs_file_extent_encryption(eb, fi) ||
1783 btrfs_file_extent_other_encoding(eb, fi)))
1784 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785 if (disk_bytenr > 0)
1786 rec->found_size += num_bytes;
1788 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 rec->extent_end = key->offset + num_bytes;
1793 * The data reloc tree will copy full extents into its inode and then
1794 * copy the corresponding csums. Because the extent it copied could be
1795 * a preallocated extent that hasn't been written to yet there may be no
1796 * csums to copy, ergo we won't have csums for our file extent. This is
1797 * ok so just don't bother checking csums if the inode belongs to the
1800 if (disk_bytenr > 0 &&
1801 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1803 if (btrfs_file_extent_compression(eb, fi))
1804 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1806 disk_bytenr += extent_offset;
1808 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1811 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1813 rec->found_csum_item = 1;
1814 if (found < num_bytes)
1815 rec->some_csum_missing = 1;
1816 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1818 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825 struct walk_control *wc)
1827 struct btrfs_key key;
1831 struct cache_tree *inode_cache;
1832 struct shared_node *active_node;
1834 if (wc->root_level == wc->active_node &&
1835 btrfs_root_refs(&root->root_item) == 0)
1838 active_node = wc->nodes[wc->active_node];
1839 inode_cache = &active_node->inode_cache;
1840 nritems = btrfs_header_nritems(eb);
1841 for (i = 0; i < nritems; i++) {
1842 btrfs_item_key_to_cpu(eb, &key, i);
1844 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1846 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1849 if (active_node->current == NULL ||
1850 active_node->current->ino < key.objectid) {
1851 if (active_node->current) {
1852 active_node->current->checked = 1;
1853 maybe_free_inode_rec(inode_cache,
1854 active_node->current);
1856 active_node->current = get_inode_rec(inode_cache,
1858 BUG_ON(IS_ERR(active_node->current));
1861 case BTRFS_DIR_ITEM_KEY:
1862 case BTRFS_DIR_INDEX_KEY:
1863 ret = process_dir_item(eb, i, &key, active_node);
1865 case BTRFS_INODE_REF_KEY:
1866 ret = process_inode_ref(eb, i, &key, active_node);
1868 case BTRFS_INODE_EXTREF_KEY:
1869 ret = process_inode_extref(eb, i, &key, active_node);
1871 case BTRFS_INODE_ITEM_KEY:
1872 ret = process_inode_item(eb, i, &key, active_node);
1874 case BTRFS_EXTENT_DATA_KEY:
1875 ret = process_file_extent(root, eb, i, &key,
1886 u64 bytenr[BTRFS_MAX_LEVEL];
1887 u64 refs[BTRFS_MAX_LEVEL];
1888 int need_check[BTRFS_MAX_LEVEL];
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892 struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894 unsigned int ext_ref);
1897 * Returns >0 Found error, not fatal, should continue
1898 * Returns <0 Fatal error, must exit the whole check
1899 * Returns 0 No errors found
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902 struct node_refs *nrefs, int *level, int ext_ref)
1904 struct extent_buffer *cur = path->nodes[0];
1905 struct btrfs_key key;
1909 int root_level = btrfs_header_level(root->node);
1911 int ret = 0; /* Final return value */
1912 int err = 0; /* Positive error bitmap */
1914 cur_bytenr = cur->start;
1916 /* skip to first inode item or the first inode number change */
1917 nritems = btrfs_header_nritems(cur);
1918 for (i = 0; i < nritems; i++) {
1919 btrfs_item_key_to_cpu(cur, &key, i);
1921 first_ino = key.objectid;
1922 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923 (first_ino && first_ino != key.objectid))
1927 path->slots[0] = nritems;
1933 err |= check_inode_item(root, path, ext_ref);
1935 if (err & LAST_ITEM)
1938 /* still have inode items in thie leaf */
1939 if (cur->start == cur_bytenr)
1943 * we have switched to another leaf, above nodes may
1944 * have changed, here walk down the path, if a node
1945 * or leaf is shared, check whether we can skip this
1948 for (i = root_level; i >= 0; i--) {
1949 if (path->nodes[i]->start == nrefs->bytenr[i])
1952 ret = update_nodes_refs(root,
1953 path->nodes[i]->start,
1958 if (!nrefs->need_check[i]) {
1964 for (i = 0; i < *level; i++) {
1965 free_extent_buffer(path->nodes[i]);
1966 path->nodes[i] = NULL;
1975 static void reada_walk_down(struct btrfs_root *root,
1976 struct extent_buffer *node, int slot)
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1985 level = btrfs_header_level(node);
1989 nritems = btrfs_header_nritems(node);
1990 for (i = slot; i < nritems; i++) {
1991 bytenr = btrfs_node_blockptr(node, i);
1992 ptr_gen = btrfs_node_ptr_generation(node, i);
1993 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1999 * Check the child node/leaf by the following condition:
2000 * 1. the first item key of the node/leaf should be the same with the one
2002 * 2. block in parent node should match the child node/leaf.
2003 * 3. generation of parent node and child's header should be consistent.
2005 * Or the child node/leaf pointed by the key in parent is not valid.
2007 * We hope to check leaf owner too, but since subvol may share leaves,
2008 * which makes leaf owner check not so strong, key check should be
2009 * sufficient enough for that case.
2011 static int check_child_node(struct extent_buffer *parent, int slot,
2012 struct extent_buffer *child)
2014 struct btrfs_key parent_key;
2015 struct btrfs_key child_key;
2018 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2019 if (btrfs_header_level(child) == 0)
2020 btrfs_item_key_to_cpu(child, &child_key, 0);
2022 btrfs_node_key_to_cpu(child, &child_key, 0);
2024 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2027 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2028 parent_key.objectid, parent_key.type, parent_key.offset,
2029 child_key.objectid, child_key.type, child_key.offset);
2031 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2033 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2034 btrfs_node_blockptr(parent, slot),
2035 btrfs_header_bytenr(child));
2037 if (btrfs_node_ptr_generation(parent, slot) !=
2038 btrfs_header_generation(child)) {
2040 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2041 btrfs_header_generation(child),
2042 btrfs_node_ptr_generation(parent, slot));
2048 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2049 * in every fs or file tree check. Here we find its all root ids, and only check
2050 * it in the fs or file tree which has the smallest root id.
2052 static int need_check(struct btrfs_root *root, struct ulist *roots)
2054 struct rb_node *node;
2055 struct ulist_node *u;
2057 if (roots->nnodes == 1)
2060 node = rb_first(&roots->root);
2061 u = rb_entry(node, struct ulist_node, rb_node);
2063 * current root id is not smallest, we skip it and let it be checked
2064 * in the fs or file tree who hash the smallest root id.
2066 if (root->objectid != u->val)
2073 * for a tree node or leaf, we record its reference count, so later if we still
2074 * process this node or leaf, don't need to compute its reference count again.
2076 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2077 struct node_refs *nrefs, u64 level)
2081 struct ulist *roots;
2083 if (nrefs->bytenr[level] != bytenr) {
2084 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2085 level, 1, &refs, NULL);
2089 nrefs->bytenr[level] = bytenr;
2090 nrefs->refs[level] = refs;
2092 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2097 check = need_check(root, roots);
2099 nrefs->need_check[level] = check;
2101 nrefs->need_check[level] = 1;
2108 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2109 struct walk_control *wc, int *level,
2110 struct node_refs *nrefs)
2112 enum btrfs_tree_block_status status;
2115 struct btrfs_fs_info *fs_info = root->fs_info;
2116 struct extent_buffer *next;
2117 struct extent_buffer *cur;
2121 WARN_ON(*level < 0);
2122 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2124 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2125 refs = nrefs->refs[*level];
2128 ret = btrfs_lookup_extent_info(NULL, root,
2129 path->nodes[*level]->start,
2130 *level, 1, &refs, NULL);
2135 nrefs->bytenr[*level] = path->nodes[*level]->start;
2136 nrefs->refs[*level] = refs;
2140 ret = enter_shared_node(root, path->nodes[*level]->start,
2148 while (*level >= 0) {
2149 WARN_ON(*level < 0);
2150 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2151 cur = path->nodes[*level];
2153 if (btrfs_header_level(cur) != *level)
2156 if (path->slots[*level] >= btrfs_header_nritems(cur))
2159 ret = process_one_leaf(root, cur, wc);
2164 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2165 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2167 if (bytenr == nrefs->bytenr[*level - 1]) {
2168 refs = nrefs->refs[*level - 1];
2170 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2171 *level - 1, 1, &refs, NULL);
2175 nrefs->bytenr[*level - 1] = bytenr;
2176 nrefs->refs[*level - 1] = refs;
2181 ret = enter_shared_node(root, bytenr, refs,
2184 path->slots[*level]++;
2189 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2190 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2191 free_extent_buffer(next);
2192 reada_walk_down(root, cur, path->slots[*level]);
2193 next = read_tree_block(root->fs_info, bytenr,
2194 fs_info->nodesize, ptr_gen);
2195 if (!extent_buffer_uptodate(next)) {
2196 struct btrfs_key node_key;
2198 btrfs_node_key_to_cpu(path->nodes[*level],
2200 path->slots[*level]);
2201 btrfs_add_corrupt_extent_record(root->fs_info,
2203 path->nodes[*level]->start,
2204 root->fs_info->nodesize,
2211 ret = check_child_node(cur, path->slots[*level], next);
2213 free_extent_buffer(next);
2218 if (btrfs_is_leaf(next))
2219 status = btrfs_check_leaf(root, NULL, next);
2221 status = btrfs_check_node(root, NULL, next);
2222 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2223 free_extent_buffer(next);
2228 *level = *level - 1;
2229 free_extent_buffer(path->nodes[*level]);
2230 path->nodes[*level] = next;
2231 path->slots[*level] = 0;
2234 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2238 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2239 unsigned int ext_ref);
2242 * Returns >0 Found error, should continue
2243 * Returns <0 Fatal error, must exit the whole check
2244 * Returns 0 No errors found
2246 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2247 int *level, struct node_refs *nrefs, int ext_ref)
2249 enum btrfs_tree_block_status status;
2252 struct btrfs_fs_info *fs_info = root->fs_info;
2253 struct extent_buffer *next;
2254 struct extent_buffer *cur;
2257 WARN_ON(*level < 0);
2258 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2260 ret = update_nodes_refs(root, path->nodes[*level]->start,
2265 while (*level >= 0) {
2266 WARN_ON(*level < 0);
2267 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2268 cur = path->nodes[*level];
2270 if (btrfs_header_level(cur) != *level)
2273 if (path->slots[*level] >= btrfs_header_nritems(cur))
2275 /* Don't forgot to check leaf/node validation */
2277 ret = btrfs_check_leaf(root, NULL, cur);
2278 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2282 ret = process_one_leaf_v2(root, path, nrefs,
2286 ret = btrfs_check_node(root, NULL, cur);
2287 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2292 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2293 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2295 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2298 if (!nrefs->need_check[*level - 1]) {
2299 path->slots[*level]++;
2303 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2304 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2305 free_extent_buffer(next);
2306 reada_walk_down(root, cur, path->slots[*level]);
2307 next = read_tree_block(fs_info, bytenr,
2308 fs_info->nodesize, ptr_gen);
2309 if (!extent_buffer_uptodate(next)) {
2310 struct btrfs_key node_key;
2312 btrfs_node_key_to_cpu(path->nodes[*level],
2314 path->slots[*level]);
2315 btrfs_add_corrupt_extent_record(fs_info,
2317 path->nodes[*level]->start,
2325 ret = check_child_node(cur, path->slots[*level], next);
2329 if (btrfs_is_leaf(next))
2330 status = btrfs_check_leaf(root, NULL, next);
2332 status = btrfs_check_node(root, NULL, next);
2333 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2334 free_extent_buffer(next);
2339 *level = *level - 1;
2340 free_extent_buffer(path->nodes[*level]);
2341 path->nodes[*level] = next;
2342 path->slots[*level] = 0;
2347 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2348 struct walk_control *wc, int *level)
2351 struct extent_buffer *leaf;
2353 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2354 leaf = path->nodes[i];
2355 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2360 free_extent_buffer(path->nodes[*level]);
2361 path->nodes[*level] = NULL;
2362 BUG_ON(*level > wc->active_node);
2363 if (*level == wc->active_node)
2364 leave_shared_node(root, wc, *level);
2371 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2375 struct extent_buffer *leaf;
2377 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2378 leaf = path->nodes[i];
2379 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2384 free_extent_buffer(path->nodes[*level]);
2385 path->nodes[*level] = NULL;
2392 static int check_root_dir(struct inode_record *rec)
2394 struct inode_backref *backref;
2397 if (!rec->found_inode_item || rec->errors)
2399 if (rec->nlink != 1 || rec->found_link != 0)
2401 if (list_empty(&rec->backrefs))
2403 backref = to_inode_backref(rec->backrefs.next);
2404 if (!backref->found_inode_ref)
2406 if (backref->index != 0 || backref->namelen != 2 ||
2407 memcmp(backref->name, "..", 2))
2409 if (backref->found_dir_index || backref->found_dir_item)
2416 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2417 struct btrfs_root *root, struct btrfs_path *path,
2418 struct inode_record *rec)
2420 struct btrfs_inode_item *ei;
2421 struct btrfs_key key;
2424 key.objectid = rec->ino;
2425 key.type = BTRFS_INODE_ITEM_KEY;
2426 key.offset = (u64)-1;
2428 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2432 if (!path->slots[0]) {
2439 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2440 if (key.objectid != rec->ino) {
2445 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2446 struct btrfs_inode_item);
2447 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2448 btrfs_mark_buffer_dirty(path->nodes[0]);
2449 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2450 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2451 root->root_key.objectid);
2453 btrfs_release_path(path);
2457 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2458 struct btrfs_root *root,
2459 struct btrfs_path *path,
2460 struct inode_record *rec)
2464 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2465 btrfs_release_path(path);
2467 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2471 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2472 struct btrfs_root *root,
2473 struct btrfs_path *path,
2474 struct inode_record *rec)
2476 struct btrfs_inode_item *ei;
2477 struct btrfs_key key;
2480 key.objectid = rec->ino;
2481 key.type = BTRFS_INODE_ITEM_KEY;
2484 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2491 /* Since ret == 0, no need to check anything */
2492 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2493 struct btrfs_inode_item);
2494 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2495 btrfs_mark_buffer_dirty(path->nodes[0]);
2496 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2497 printf("reset nbytes for ino %llu root %llu\n",
2498 rec->ino, root->root_key.objectid);
2500 btrfs_release_path(path);
2504 static int add_missing_dir_index(struct btrfs_root *root,
2505 struct cache_tree *inode_cache,
2506 struct inode_record *rec,
2507 struct inode_backref *backref)
2509 struct btrfs_path path;
2510 struct btrfs_trans_handle *trans;
2511 struct btrfs_dir_item *dir_item;
2512 struct extent_buffer *leaf;
2513 struct btrfs_key key;
2514 struct btrfs_disk_key disk_key;
2515 struct inode_record *dir_rec;
2516 unsigned long name_ptr;
2517 u32 data_size = sizeof(*dir_item) + backref->namelen;
2520 trans = btrfs_start_transaction(root, 1);
2522 return PTR_ERR(trans);
2524 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2525 (unsigned long long)rec->ino);
2527 btrfs_init_path(&path);
2528 key.objectid = backref->dir;
2529 key.type = BTRFS_DIR_INDEX_KEY;
2530 key.offset = backref->index;
2531 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2534 leaf = path.nodes[0];
2535 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2537 disk_key.objectid = cpu_to_le64(rec->ino);
2538 disk_key.type = BTRFS_INODE_ITEM_KEY;
2539 disk_key.offset = 0;
2541 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2542 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2543 btrfs_set_dir_data_len(leaf, dir_item, 0);
2544 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2545 name_ptr = (unsigned long)(dir_item + 1);
2546 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2547 btrfs_mark_buffer_dirty(leaf);
2548 btrfs_release_path(&path);
2549 btrfs_commit_transaction(trans, root);
2551 backref->found_dir_index = 1;
2552 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2553 BUG_ON(IS_ERR(dir_rec));
2556 dir_rec->found_size += backref->namelen;
2557 if (dir_rec->found_size == dir_rec->isize &&
2558 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2559 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2560 if (dir_rec->found_size != dir_rec->isize)
2561 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2566 static int delete_dir_index(struct btrfs_root *root,
2567 struct inode_backref *backref)
2569 struct btrfs_trans_handle *trans;
2570 struct btrfs_dir_item *di;
2571 struct btrfs_path path;
2574 trans = btrfs_start_transaction(root, 1);
2576 return PTR_ERR(trans);
2578 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2579 (unsigned long long)backref->dir,
2580 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2581 (unsigned long long)root->objectid);
2583 btrfs_init_path(&path);
2584 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2585 backref->name, backref->namelen,
2586 backref->index, -1);
2589 btrfs_release_path(&path);
2590 btrfs_commit_transaction(trans, root);
2597 ret = btrfs_del_item(trans, root, &path);
2599 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2601 btrfs_release_path(&path);
2602 btrfs_commit_transaction(trans, root);
2606 static int create_inode_item(struct btrfs_root *root,
2607 struct inode_record *rec,
2610 struct btrfs_trans_handle *trans;
2611 struct btrfs_inode_item inode_item;
2612 time_t now = time(NULL);
2615 trans = btrfs_start_transaction(root, 1);
2616 if (IS_ERR(trans)) {
2617 ret = PTR_ERR(trans);
2621 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2622 "be incomplete, please check permissions and content after "
2623 "the fsck completes.\n", (unsigned long long)root->objectid,
2624 (unsigned long long)rec->ino);
2626 memset(&inode_item, 0, sizeof(inode_item));
2627 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2629 btrfs_set_stack_inode_nlink(&inode_item, 1);
2631 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2632 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2633 if (rec->found_dir_item) {
2634 if (rec->found_file_extent)
2635 fprintf(stderr, "root %llu inode %llu has both a dir "
2636 "item and extents, unsure if it is a dir or a "
2637 "regular file so setting it as a directory\n",
2638 (unsigned long long)root->objectid,
2639 (unsigned long long)rec->ino);
2640 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2641 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2642 } else if (!rec->found_dir_item) {
2643 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2644 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2646 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2647 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2648 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2649 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2650 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2651 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2652 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2653 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2655 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2657 btrfs_commit_transaction(trans, root);
2661 static int repair_inode_backrefs(struct btrfs_root *root,
2662 struct inode_record *rec,
2663 struct cache_tree *inode_cache,
2666 struct inode_backref *tmp, *backref;
2667 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2671 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2672 if (!delete && rec->ino == root_dirid) {
2673 if (!rec->found_inode_item) {
2674 ret = create_inode_item(root, rec, 1);
2681 /* Index 0 for root dir's are special, don't mess with it */
2682 if (rec->ino == root_dirid && backref->index == 0)
2686 ((backref->found_dir_index && !backref->found_inode_ref) ||
2687 (backref->found_dir_index && backref->found_inode_ref &&
2688 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2689 ret = delete_dir_index(root, backref);
2693 list_del(&backref->list);
2698 if (!delete && !backref->found_dir_index &&
2699 backref->found_dir_item && backref->found_inode_ref) {
2700 ret = add_missing_dir_index(root, inode_cache, rec,
2705 if (backref->found_dir_item &&
2706 backref->found_dir_index) {
2707 if (!backref->errors &&
2708 backref->found_inode_ref) {
2709 list_del(&backref->list);
2716 if (!delete && (!backref->found_dir_index &&
2717 !backref->found_dir_item &&
2718 backref->found_inode_ref)) {
2719 struct btrfs_trans_handle *trans;
2720 struct btrfs_key location;
2722 ret = check_dir_conflict(root, backref->name,
2728 * let nlink fixing routine to handle it,
2729 * which can do it better.
2734 location.objectid = rec->ino;
2735 location.type = BTRFS_INODE_ITEM_KEY;
2736 location.offset = 0;
2738 trans = btrfs_start_transaction(root, 1);
2739 if (IS_ERR(trans)) {
2740 ret = PTR_ERR(trans);
2743 fprintf(stderr, "adding missing dir index/item pair "
2745 (unsigned long long)rec->ino);
2746 ret = btrfs_insert_dir_item(trans, root, backref->name,
2748 backref->dir, &location,
2749 imode_to_type(rec->imode),
2752 btrfs_commit_transaction(trans, root);
2756 if (!delete && (backref->found_inode_ref &&
2757 backref->found_dir_index &&
2758 backref->found_dir_item &&
2759 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2760 !rec->found_inode_item)) {
2761 ret = create_inode_item(root, rec, 0);
2768 return ret ? ret : repaired;
2772 * To determine the file type for nlink/inode_item repair
2774 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2775 * Return -ENOENT if file type is not found.
2777 static int find_file_type(struct inode_record *rec, u8 *type)
2779 struct inode_backref *backref;
2781 /* For inode item recovered case */
2782 if (rec->found_inode_item) {
2783 *type = imode_to_type(rec->imode);
2787 list_for_each_entry(backref, &rec->backrefs, list) {
2788 if (backref->found_dir_index || backref->found_dir_item) {
2789 *type = backref->filetype;
2797 * To determine the file name for nlink repair
2799 * Return 0 if file name is found, set name and namelen.
2800 * Return -ENOENT if file name is not found.
2802 static int find_file_name(struct inode_record *rec,
2803 char *name, int *namelen)
2805 struct inode_backref *backref;
2807 list_for_each_entry(backref, &rec->backrefs, list) {
2808 if (backref->found_dir_index || backref->found_dir_item ||
2809 backref->found_inode_ref) {
2810 memcpy(name, backref->name, backref->namelen);
2811 *namelen = backref->namelen;
2818 /* Reset the nlink of the inode to the correct one */
2819 static int reset_nlink(struct btrfs_trans_handle *trans,
2820 struct btrfs_root *root,
2821 struct btrfs_path *path,
2822 struct inode_record *rec)
2824 struct inode_backref *backref;
2825 struct inode_backref *tmp;
2826 struct btrfs_key key;
2827 struct btrfs_inode_item *inode_item;
2830 /* We don't believe this either, reset it and iterate backref */
2831 rec->found_link = 0;
2833 /* Remove all backref including the valid ones */
2834 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2835 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2836 backref->index, backref->name,
2837 backref->namelen, 0);
2841 /* remove invalid backref, so it won't be added back */
2842 if (!(backref->found_dir_index &&
2843 backref->found_dir_item &&
2844 backref->found_inode_ref)) {
2845 list_del(&backref->list);
2852 /* Set nlink to 0 */
2853 key.objectid = rec->ino;
2854 key.type = BTRFS_INODE_ITEM_KEY;
2856 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2863 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2864 struct btrfs_inode_item);
2865 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2866 btrfs_mark_buffer_dirty(path->nodes[0]);
2867 btrfs_release_path(path);
2870 * Add back valid inode_ref/dir_item/dir_index,
2871 * add_link() will handle the nlink inc, so new nlink must be correct
2873 list_for_each_entry(backref, &rec->backrefs, list) {
2874 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2875 backref->name, backref->namelen,
2876 backref->filetype, &backref->index, 1);
2881 btrfs_release_path(path);
2885 static int get_highest_inode(struct btrfs_trans_handle *trans,
2886 struct btrfs_root *root,
2887 struct btrfs_path *path,
2890 struct btrfs_key key, found_key;
2893 btrfs_init_path(path);
2894 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2896 key.type = BTRFS_INODE_ITEM_KEY;
2897 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2899 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2900 path->slots[0] - 1);
2901 *highest_ino = found_key.objectid;
2904 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2906 btrfs_release_path(path);
2910 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2911 struct btrfs_root *root,
2912 struct btrfs_path *path,
2913 struct inode_record *rec)
2915 char *dir_name = "lost+found";
2916 char namebuf[BTRFS_NAME_LEN] = {0};
2921 int name_recovered = 0;
2922 int type_recovered = 0;
2926 * Get file name and type first before these invalid inode ref
2927 * are deleted by remove_all_invalid_backref()
2929 name_recovered = !find_file_name(rec, namebuf, &namelen);
2930 type_recovered = !find_file_type(rec, &type);
2932 if (!name_recovered) {
2933 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2934 rec->ino, rec->ino);
2935 namelen = count_digits(rec->ino);
2936 sprintf(namebuf, "%llu", rec->ino);
2939 if (!type_recovered) {
2940 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2942 type = BTRFS_FT_REG_FILE;
2946 ret = reset_nlink(trans, root, path, rec);
2949 "Failed to reset nlink for inode %llu: %s\n",
2950 rec->ino, strerror(-ret));
2954 if (rec->found_link == 0) {
2955 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2959 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2960 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2963 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2964 dir_name, strerror(-ret));
2967 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2968 namebuf, namelen, type, NULL, 1);
2970 * Add ".INO" suffix several times to handle case where
2971 * "FILENAME.INO" is already taken by another file.
2973 while (ret == -EEXIST) {
2975 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2977 if (namelen + count_digits(rec->ino) + 1 >
2982 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2984 namelen += count_digits(rec->ino) + 1;
2985 ret = btrfs_add_link(trans, root, rec->ino,
2986 lost_found_ino, namebuf,
2987 namelen, type, NULL, 1);
2991 "Failed to link the inode %llu to %s dir: %s\n",
2992 rec->ino, dir_name, strerror(-ret));
2996 * Just increase the found_link, don't actually add the
2997 * backref. This will make things easier and this inode
2998 * record will be freed after the repair is done.
2999 * So fsck will not report problem about this inode.
3002 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3003 namelen, namebuf, dir_name);
3005 printf("Fixed the nlink of inode %llu\n", rec->ino);
3008 * Clear the flag anyway, or we will loop forever for the same inode
3009 * as it will not be removed from the bad inode list and the dead loop
3012 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3013 btrfs_release_path(path);
3018 * Check if there is any normal(reg or prealloc) file extent for given
3020 * This is used to determine the file type when neither its dir_index/item or
3021 * inode_item exists.
3023 * This will *NOT* report error, if any error happens, just consider it does
3024 * not have any normal file extent.
3026 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3028 struct btrfs_path path;
3029 struct btrfs_key key;
3030 struct btrfs_key found_key;
3031 struct btrfs_file_extent_item *fi;
3035 btrfs_init_path(&path);
3037 key.type = BTRFS_EXTENT_DATA_KEY;
3040 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3045 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3046 ret = btrfs_next_leaf(root, &path);
3053 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3055 if (found_key.objectid != ino ||
3056 found_key.type != BTRFS_EXTENT_DATA_KEY)
3058 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3059 struct btrfs_file_extent_item);
3060 type = btrfs_file_extent_type(path.nodes[0], fi);
3061 if (type != BTRFS_FILE_EXTENT_INLINE) {
3067 btrfs_release_path(&path);
3071 static u32 btrfs_type_to_imode(u8 type)
3073 static u32 imode_by_btrfs_type[] = {
3074 [BTRFS_FT_REG_FILE] = S_IFREG,
3075 [BTRFS_FT_DIR] = S_IFDIR,
3076 [BTRFS_FT_CHRDEV] = S_IFCHR,
3077 [BTRFS_FT_BLKDEV] = S_IFBLK,
3078 [BTRFS_FT_FIFO] = S_IFIFO,
3079 [BTRFS_FT_SOCK] = S_IFSOCK,
3080 [BTRFS_FT_SYMLINK] = S_IFLNK,
3083 return imode_by_btrfs_type[(type)];
3086 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3087 struct btrfs_root *root,
3088 struct btrfs_path *path,
3089 struct inode_record *rec)
3093 int type_recovered = 0;
3096 printf("Trying to rebuild inode:%llu\n", rec->ino);
3098 type_recovered = !find_file_type(rec, &filetype);
3101 * Try to determine inode type if type not found.
3103 * For found regular file extent, it must be FILE.
3104 * For found dir_item/index, it must be DIR.
3106 * For undetermined one, use FILE as fallback.
3109 * 1. If found backref(inode_index/item is already handled) to it,
3111 * Need new inode-inode ref structure to allow search for that.
3113 if (!type_recovered) {
3114 if (rec->found_file_extent &&
3115 find_normal_file_extent(root, rec->ino)) {
3117 filetype = BTRFS_FT_REG_FILE;
3118 } else if (rec->found_dir_item) {
3120 filetype = BTRFS_FT_DIR;
3121 } else if (!list_empty(&rec->orphan_extents)) {
3123 filetype = BTRFS_FT_REG_FILE;
3125 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3128 filetype = BTRFS_FT_REG_FILE;
3132 ret = btrfs_new_inode(trans, root, rec->ino,
3133 mode | btrfs_type_to_imode(filetype));
3138 * Here inode rebuild is done, we only rebuild the inode item,
3139 * don't repair the nlink(like move to lost+found).
3140 * That is the job of nlink repair.
3142 * We just fill the record and return
3144 rec->found_dir_item = 1;
3145 rec->imode = mode | btrfs_type_to_imode(filetype);
3147 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3148 /* Ensure the inode_nlinks repair function will be called */
3149 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3154 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3155 struct btrfs_root *root,
3156 struct btrfs_path *path,
3157 struct inode_record *rec)
3159 struct orphan_data_extent *orphan;
3160 struct orphan_data_extent *tmp;
3163 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3165 * Check for conflicting file extents
3167 * Here we don't know whether the extents is compressed or not,
3168 * so we can only assume it not compressed nor data offset,
3169 * and use its disk_len as extent length.
3171 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3172 orphan->offset, orphan->disk_len, 0);
3173 btrfs_release_path(path);
3178 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3179 orphan->disk_bytenr, orphan->disk_len);
3180 ret = btrfs_free_extent(trans,
3181 root->fs_info->extent_root,
3182 orphan->disk_bytenr, orphan->disk_len,
3183 0, root->objectid, orphan->objectid,
3188 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3189 orphan->offset, orphan->disk_bytenr,
3190 orphan->disk_len, orphan->disk_len);
3194 /* Update file size info */
3195 rec->found_size += orphan->disk_len;
3196 if (rec->found_size == rec->nbytes)
3197 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3199 /* Update the file extent hole info too */
3200 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3204 if (RB_EMPTY_ROOT(&rec->holes))
3205 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3207 list_del(&orphan->list);
3210 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3215 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3216 struct btrfs_root *root,
3217 struct btrfs_path *path,
3218 struct inode_record *rec)
3220 struct rb_node *node;
3221 struct file_extent_hole *hole;
3225 node = rb_first(&rec->holes);
3229 hole = rb_entry(node, struct file_extent_hole, node);
3230 ret = btrfs_punch_hole(trans, root, rec->ino,
3231 hole->start, hole->len);
3234 ret = del_file_extent_hole(&rec->holes, hole->start,
3238 if (RB_EMPTY_ROOT(&rec->holes))
3239 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3240 node = rb_first(&rec->holes);
3242 /* special case for a file losing all its file extent */
3244 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3245 round_up(rec->isize,
3246 root->fs_info->sectorsize));
3250 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3251 rec->ino, root->objectid);
3256 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3258 struct btrfs_trans_handle *trans;
3259 struct btrfs_path path;
3262 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3263 I_ERR_NO_ORPHAN_ITEM |
3264 I_ERR_LINK_COUNT_WRONG |
3265 I_ERR_NO_INODE_ITEM |
3266 I_ERR_FILE_EXTENT_ORPHAN |
3267 I_ERR_FILE_EXTENT_DISCOUNT|
3268 I_ERR_FILE_NBYTES_WRONG)))
3272 * For nlink repair, it may create a dir and add link, so
3273 * 2 for parent(256)'s dir_index and dir_item
3274 * 2 for lost+found dir's inode_item and inode_ref
3275 * 1 for the new inode_ref of the file
3276 * 2 for lost+found dir's dir_index and dir_item for the file
3278 trans = btrfs_start_transaction(root, 7);
3280 return PTR_ERR(trans);
3282 btrfs_init_path(&path);
3283 if (rec->errors & I_ERR_NO_INODE_ITEM)
3284 ret = repair_inode_no_item(trans, root, &path, rec);
3285 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3286 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3287 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3288 ret = repair_inode_discount_extent(trans, root, &path, rec);
3289 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3290 ret = repair_inode_isize(trans, root, &path, rec);
3291 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3292 ret = repair_inode_orphan_item(trans, root, &path, rec);
3293 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3294 ret = repair_inode_nlinks(trans, root, &path, rec);
3295 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3296 ret = repair_inode_nbytes(trans, root, &path, rec);
3297 btrfs_commit_transaction(trans, root);
3298 btrfs_release_path(&path);
3302 static int check_inode_recs(struct btrfs_root *root,
3303 struct cache_tree *inode_cache)
3305 struct cache_extent *cache;
3306 struct ptr_node *node;
3307 struct inode_record *rec;
3308 struct inode_backref *backref;
3313 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3315 if (btrfs_root_refs(&root->root_item) == 0) {
3316 if (!cache_tree_empty(inode_cache))
3317 fprintf(stderr, "warning line %d\n", __LINE__);
3322 * We need to repair backrefs first because we could change some of the
3323 * errors in the inode recs.
3325 * We also need to go through and delete invalid backrefs first and then
3326 * add the correct ones second. We do this because we may get EEXIST
3327 * when adding back the correct index because we hadn't yet deleted the
3330 * For example, if we were missing a dir index then the directories
3331 * isize would be wrong, so if we fixed the isize to what we thought it
3332 * would be and then fixed the backref we'd still have a invalid fs, so
3333 * we need to add back the dir index and then check to see if the isize
3338 if (stage == 3 && !err)
3341 cache = search_cache_extent(inode_cache, 0);
3342 while (repair && cache) {
3343 node = container_of(cache, struct ptr_node, cache);
3345 cache = next_cache_extent(cache);
3347 /* Need to free everything up and rescan */
3349 remove_cache_extent(inode_cache, &node->cache);
3351 free_inode_rec(rec);
3355 if (list_empty(&rec->backrefs))
3358 ret = repair_inode_backrefs(root, rec, inode_cache,
3372 rec = get_inode_rec(inode_cache, root_dirid, 0);
3373 BUG_ON(IS_ERR(rec));
3375 ret = check_root_dir(rec);
3377 fprintf(stderr, "root %llu root dir %llu error\n",
3378 (unsigned long long)root->root_key.objectid,
3379 (unsigned long long)root_dirid);
3380 print_inode_error(root, rec);
3385 struct btrfs_trans_handle *trans;
3387 trans = btrfs_start_transaction(root, 1);
3388 if (IS_ERR(trans)) {
3389 err = PTR_ERR(trans);
3394 "root %llu missing its root dir, recreating\n",
3395 (unsigned long long)root->objectid);
3397 ret = btrfs_make_root_dir(trans, root, root_dirid);
3400 btrfs_commit_transaction(trans, root);
3404 fprintf(stderr, "root %llu root dir %llu not found\n",
3405 (unsigned long long)root->root_key.objectid,
3406 (unsigned long long)root_dirid);
3410 cache = search_cache_extent(inode_cache, 0);
3413 node = container_of(cache, struct ptr_node, cache);
3415 remove_cache_extent(inode_cache, &node->cache);
3417 if (rec->ino == root_dirid ||
3418 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3419 free_inode_rec(rec);
3423 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3424 ret = check_orphan_item(root, rec->ino);
3426 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3427 if (can_free_inode_rec(rec)) {
3428 free_inode_rec(rec);
3433 if (!rec->found_inode_item)
3434 rec->errors |= I_ERR_NO_INODE_ITEM;
3435 if (rec->found_link != rec->nlink)
3436 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3438 ret = try_repair_inode(root, rec);
3439 if (ret == 0 && can_free_inode_rec(rec)) {
3440 free_inode_rec(rec);
3446 if (!(repair && ret == 0))
3448 print_inode_error(root, rec);
3449 list_for_each_entry(backref, &rec->backrefs, list) {
3450 if (!backref->found_dir_item)
3451 backref->errors |= REF_ERR_NO_DIR_ITEM;
3452 if (!backref->found_dir_index)
3453 backref->errors |= REF_ERR_NO_DIR_INDEX;
3454 if (!backref->found_inode_ref)
3455 backref->errors |= REF_ERR_NO_INODE_REF;
3456 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3457 " namelen %u name %s filetype %d errors %x",
3458 (unsigned long long)backref->dir,
3459 (unsigned long long)backref->index,
3460 backref->namelen, backref->name,
3461 backref->filetype, backref->errors);
3462 print_ref_error(backref->errors);
3464 free_inode_rec(rec);
3466 return (error > 0) ? -1 : 0;
3469 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3472 struct cache_extent *cache;
3473 struct root_record *rec = NULL;
3476 cache = lookup_cache_extent(root_cache, objectid, 1);
3478 rec = container_of(cache, struct root_record, cache);
3480 rec = calloc(1, sizeof(*rec));
3482 return ERR_PTR(-ENOMEM);
3483 rec->objectid = objectid;
3484 INIT_LIST_HEAD(&rec->backrefs);
3485 rec->cache.start = objectid;
3486 rec->cache.size = 1;
3488 ret = insert_cache_extent(root_cache, &rec->cache);
3490 return ERR_PTR(-EEXIST);
3495 static struct root_backref *get_root_backref(struct root_record *rec,
3496 u64 ref_root, u64 dir, u64 index,
3497 const char *name, int namelen)
3499 struct root_backref *backref;
3501 list_for_each_entry(backref, &rec->backrefs, list) {
3502 if (backref->ref_root != ref_root || backref->dir != dir ||
3503 backref->namelen != namelen)
3505 if (memcmp(name, backref->name, namelen))
3510 backref = calloc(1, sizeof(*backref) + namelen + 1);
3513 backref->ref_root = ref_root;
3515 backref->index = index;
3516 backref->namelen = namelen;
3517 memcpy(backref->name, name, namelen);
3518 backref->name[namelen] = '\0';
3519 list_add_tail(&backref->list, &rec->backrefs);
3523 static void free_root_record(struct cache_extent *cache)
3525 struct root_record *rec;
3526 struct root_backref *backref;
3528 rec = container_of(cache, struct root_record, cache);
3529 while (!list_empty(&rec->backrefs)) {
3530 backref = to_root_backref(rec->backrefs.next);
3531 list_del(&backref->list);
3538 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3540 static int add_root_backref(struct cache_tree *root_cache,
3541 u64 root_id, u64 ref_root, u64 dir, u64 index,
3542 const char *name, int namelen,
3543 int item_type, int errors)
3545 struct root_record *rec;
3546 struct root_backref *backref;
3548 rec = get_root_rec(root_cache, root_id);
3549 BUG_ON(IS_ERR(rec));
3550 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3553 backref->errors |= errors;
3555 if (item_type != BTRFS_DIR_ITEM_KEY) {
3556 if (backref->found_dir_index || backref->found_back_ref ||
3557 backref->found_forward_ref) {
3558 if (backref->index != index)
3559 backref->errors |= REF_ERR_INDEX_UNMATCH;
3561 backref->index = index;
3565 if (item_type == BTRFS_DIR_ITEM_KEY) {
3566 if (backref->found_forward_ref)
3568 backref->found_dir_item = 1;
3569 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3570 backref->found_dir_index = 1;
3571 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3572 if (backref->found_forward_ref)
3573 backref->errors |= REF_ERR_DUP_ROOT_REF;
3574 else if (backref->found_dir_item)
3576 backref->found_forward_ref = 1;
3577 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3578 if (backref->found_back_ref)
3579 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3580 backref->found_back_ref = 1;
3585 if (backref->found_forward_ref && backref->found_dir_item)
3586 backref->reachable = 1;
3590 static int merge_root_recs(struct btrfs_root *root,
3591 struct cache_tree *src_cache,
3592 struct cache_tree *dst_cache)
3594 struct cache_extent *cache;
3595 struct ptr_node *node;
3596 struct inode_record *rec;
3597 struct inode_backref *backref;
3600 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3601 free_inode_recs_tree(src_cache);
3606 cache = search_cache_extent(src_cache, 0);
3609 node = container_of(cache, struct ptr_node, cache);
3611 remove_cache_extent(src_cache, &node->cache);
3614 ret = is_child_root(root, root->objectid, rec->ino);
3620 list_for_each_entry(backref, &rec->backrefs, list) {
3621 BUG_ON(backref->found_inode_ref);
3622 if (backref->found_dir_item)
3623 add_root_backref(dst_cache, rec->ino,
3624 root->root_key.objectid, backref->dir,
3625 backref->index, backref->name,
3626 backref->namelen, BTRFS_DIR_ITEM_KEY,
3628 if (backref->found_dir_index)
3629 add_root_backref(dst_cache, rec->ino,
3630 root->root_key.objectid, backref->dir,
3631 backref->index, backref->name,
3632 backref->namelen, BTRFS_DIR_INDEX_KEY,
3636 free_inode_rec(rec);
3643 static int check_root_refs(struct btrfs_root *root,
3644 struct cache_tree *root_cache)
3646 struct root_record *rec;
3647 struct root_record *ref_root;
3648 struct root_backref *backref;
3649 struct cache_extent *cache;
3655 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3656 BUG_ON(IS_ERR(rec));
3659 /* fixme: this can not detect circular references */
3662 cache = search_cache_extent(root_cache, 0);
3666 rec = container_of(cache, struct root_record, cache);
3667 cache = next_cache_extent(cache);
3669 if (rec->found_ref == 0)
3672 list_for_each_entry(backref, &rec->backrefs, list) {
3673 if (!backref->reachable)
3676 ref_root = get_root_rec(root_cache,
3678 BUG_ON(IS_ERR(ref_root));
3679 if (ref_root->found_ref > 0)
3682 backref->reachable = 0;
3684 if (rec->found_ref == 0)
3690 cache = search_cache_extent(root_cache, 0);
3694 rec = container_of(cache, struct root_record, cache);
3695 cache = next_cache_extent(cache);
3697 if (rec->found_ref == 0 &&
3698 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3699 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3700 ret = check_orphan_item(root->fs_info->tree_root,
3706 * If we don't have a root item then we likely just have
3707 * a dir item in a snapshot for this root but no actual
3708 * ref key or anything so it's meaningless.
3710 if (!rec->found_root_item)
3713 fprintf(stderr, "fs tree %llu not referenced\n",
3714 (unsigned long long)rec->objectid);
3718 if (rec->found_ref > 0 && !rec->found_root_item)
3720 list_for_each_entry(backref, &rec->backrefs, list) {
3721 if (!backref->found_dir_item)
3722 backref->errors |= REF_ERR_NO_DIR_ITEM;
3723 if (!backref->found_dir_index)
3724 backref->errors |= REF_ERR_NO_DIR_INDEX;
3725 if (!backref->found_back_ref)
3726 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3727 if (!backref->found_forward_ref)
3728 backref->errors |= REF_ERR_NO_ROOT_REF;
3729 if (backref->reachable && backref->errors)
3736 fprintf(stderr, "fs tree %llu refs %u %s\n",
3737 (unsigned long long)rec->objectid, rec->found_ref,
3738 rec->found_root_item ? "" : "not found");
3740 list_for_each_entry(backref, &rec->backrefs, list) {
3741 if (!backref->reachable)
3743 if (!backref->errors && rec->found_root_item)
3745 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3746 " index %llu namelen %u name %s errors %x\n",
3747 (unsigned long long)backref->ref_root,
3748 (unsigned long long)backref->dir,
3749 (unsigned long long)backref->index,
3750 backref->namelen, backref->name,
3752 print_ref_error(backref->errors);
3755 return errors > 0 ? 1 : 0;
3758 static int process_root_ref(struct extent_buffer *eb, int slot,
3759 struct btrfs_key *key,
3760 struct cache_tree *root_cache)
3766 struct btrfs_root_ref *ref;
3767 char namebuf[BTRFS_NAME_LEN];
3770 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3772 dirid = btrfs_root_ref_dirid(eb, ref);
3773 index = btrfs_root_ref_sequence(eb, ref);
3774 name_len = btrfs_root_ref_name_len(eb, ref);
3776 if (name_len <= BTRFS_NAME_LEN) {
3780 len = BTRFS_NAME_LEN;
3781 error = REF_ERR_NAME_TOO_LONG;
3783 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3785 if (key->type == BTRFS_ROOT_REF_KEY) {
3786 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3787 index, namebuf, len, key->type, error);
3789 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3790 index, namebuf, len, key->type, error);
3795 static void free_corrupt_block(struct cache_extent *cache)
3797 struct btrfs_corrupt_block *corrupt;
3799 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3803 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3806 * Repair the btree of the given root.
3808 * The fix is to remove the node key in corrupt_blocks cache_tree.
3809 * and rebalance the tree.
3810 * After the fix, the btree should be writeable.
3812 static int repair_btree(struct btrfs_root *root,
3813 struct cache_tree *corrupt_blocks)
3815 struct btrfs_trans_handle *trans;
3816 struct btrfs_path path;
3817 struct btrfs_corrupt_block *corrupt;
3818 struct cache_extent *cache;
3819 struct btrfs_key key;
3824 if (cache_tree_empty(corrupt_blocks))
3827 trans = btrfs_start_transaction(root, 1);
3828 if (IS_ERR(trans)) {
3829 ret = PTR_ERR(trans);
3830 fprintf(stderr, "Error starting transaction: %s\n",
3834 btrfs_init_path(&path);
3835 cache = first_cache_extent(corrupt_blocks);
3837 corrupt = container_of(cache, struct btrfs_corrupt_block,
3839 level = corrupt->level;
3840 path.lowest_level = level;
3841 key.objectid = corrupt->key.objectid;
3842 key.type = corrupt->key.type;
3843 key.offset = corrupt->key.offset;
3846 * Here we don't want to do any tree balance, since it may
3847 * cause a balance with corrupted brother leaf/node,
3848 * so ins_len set to 0 here.
3849 * Balance will be done after all corrupt node/leaf is deleted.
3851 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3854 offset = btrfs_node_blockptr(path.nodes[level],
3857 /* Remove the ptr */
3858 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3862 * Remove the corresponding extent
3863 * return value is not concerned.
3865 btrfs_release_path(&path);
3866 ret = btrfs_free_extent(trans, root, offset,
3867 root->fs_info->nodesize, 0,
3868 root->root_key.objectid, level - 1, 0);
3869 cache = next_cache_extent(cache);
3872 /* Balance the btree using btrfs_search_slot() */
3873 cache = first_cache_extent(corrupt_blocks);
3875 corrupt = container_of(cache, struct btrfs_corrupt_block,
3877 memcpy(&key, &corrupt->key, sizeof(key));
3878 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3881 /* return will always >0 since it won't find the item */
3883 btrfs_release_path(&path);
3884 cache = next_cache_extent(cache);
3887 btrfs_commit_transaction(trans, root);
3888 btrfs_release_path(&path);
3892 static int check_fs_root(struct btrfs_root *root,
3893 struct cache_tree *root_cache,
3894 struct walk_control *wc)
3900 struct btrfs_path path;
3901 struct shared_node root_node;
3902 struct root_record *rec;
3903 struct btrfs_root_item *root_item = &root->root_item;
3904 struct cache_tree corrupt_blocks;
3905 struct orphan_data_extent *orphan;
3906 struct orphan_data_extent *tmp;
3907 enum btrfs_tree_block_status status;
3908 struct node_refs nrefs;
3911 * Reuse the corrupt_block cache tree to record corrupted tree block
3913 * Unlike the usage in extent tree check, here we do it in a per
3914 * fs/subvol tree base.
3916 cache_tree_init(&corrupt_blocks);
3917 root->fs_info->corrupt_blocks = &corrupt_blocks;
3919 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3920 rec = get_root_rec(root_cache, root->root_key.objectid);
3921 BUG_ON(IS_ERR(rec));
3922 if (btrfs_root_refs(root_item) > 0)
3923 rec->found_root_item = 1;
3926 btrfs_init_path(&path);
3927 memset(&root_node, 0, sizeof(root_node));
3928 cache_tree_init(&root_node.root_cache);
3929 cache_tree_init(&root_node.inode_cache);
3930 memset(&nrefs, 0, sizeof(nrefs));
3932 /* Move the orphan extent record to corresponding inode_record */
3933 list_for_each_entry_safe(orphan, tmp,
3934 &root->orphan_data_extents, list) {
3935 struct inode_record *inode;
3937 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3939 BUG_ON(IS_ERR(inode));
3940 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3941 list_move(&orphan->list, &inode->orphan_extents);
3944 level = btrfs_header_level(root->node);
3945 memset(wc->nodes, 0, sizeof(wc->nodes));
3946 wc->nodes[level] = &root_node;
3947 wc->active_node = level;
3948 wc->root_level = level;
3950 /* We may not have checked the root block, lets do that now */
3951 if (btrfs_is_leaf(root->node))
3952 status = btrfs_check_leaf(root, NULL, root->node);
3954 status = btrfs_check_node(root, NULL, root->node);
3955 if (status != BTRFS_TREE_BLOCK_CLEAN)
3958 if (btrfs_root_refs(root_item) > 0 ||
3959 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3960 path.nodes[level] = root->node;
3961 extent_buffer_get(root->node);
3962 path.slots[level] = 0;
3964 struct btrfs_key key;
3965 struct btrfs_disk_key found_key;
3967 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3968 level = root_item->drop_level;
3969 path.lowest_level = level;
3970 if (level > btrfs_header_level(root->node) ||
3971 level >= BTRFS_MAX_LEVEL) {
3972 error("ignoring invalid drop level: %u", level);
3975 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3978 btrfs_node_key(path.nodes[level], &found_key,
3980 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3981 sizeof(found_key)));
3985 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3991 wret = walk_up_tree(root, &path, wc, &level);
3998 btrfs_release_path(&path);
4000 if (!cache_tree_empty(&corrupt_blocks)) {
4001 struct cache_extent *cache;
4002 struct btrfs_corrupt_block *corrupt;
4004 printf("The following tree block(s) is corrupted in tree %llu:\n",
4005 root->root_key.objectid);
4006 cache = first_cache_extent(&corrupt_blocks);
4008 corrupt = container_of(cache,
4009 struct btrfs_corrupt_block,
4011 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4012 cache->start, corrupt->level,
4013 corrupt->key.objectid, corrupt->key.type,
4014 corrupt->key.offset);
4015 cache = next_cache_extent(cache);
4018 printf("Try to repair the btree for root %llu\n",
4019 root->root_key.objectid);
4020 ret = repair_btree(root, &corrupt_blocks);
4022 fprintf(stderr, "Failed to repair btree: %s\n",
4025 printf("Btree for root %llu is fixed\n",
4026 root->root_key.objectid);
4030 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4034 if (root_node.current) {
4035 root_node.current->checked = 1;
4036 maybe_free_inode_rec(&root_node.inode_cache,
4040 err = check_inode_recs(root, &root_node.inode_cache);
4044 free_corrupt_blocks_tree(&corrupt_blocks);
4045 root->fs_info->corrupt_blocks = NULL;
4046 free_orphan_data_extents(&root->orphan_data_extents);
4050 static int fs_root_objectid(u64 objectid)
4052 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4053 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4055 return is_fstree(objectid);
4058 static int check_fs_roots(struct btrfs_root *root,
4059 struct cache_tree *root_cache)
4061 struct btrfs_path path;
4062 struct btrfs_key key;
4063 struct walk_control wc;
4064 struct extent_buffer *leaf, *tree_node;
4065 struct btrfs_root *tmp_root;
4066 struct btrfs_root *tree_root = root->fs_info->tree_root;
4070 if (ctx.progress_enabled) {
4071 ctx.tp = TASK_FS_ROOTS;
4072 task_start(ctx.info);
4076 * Just in case we made any changes to the extent tree that weren't
4077 * reflected into the free space cache yet.
4080 reset_cached_block_groups(root->fs_info);
4081 memset(&wc, 0, sizeof(wc));
4082 cache_tree_init(&wc.shared);
4083 btrfs_init_path(&path);
4088 key.type = BTRFS_ROOT_ITEM_KEY;
4089 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4094 tree_node = tree_root->node;
4096 if (tree_node != tree_root->node) {
4097 free_root_recs_tree(root_cache);
4098 btrfs_release_path(&path);
4101 leaf = path.nodes[0];
4102 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4103 ret = btrfs_next_leaf(tree_root, &path);
4109 leaf = path.nodes[0];
4111 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4112 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4113 fs_root_objectid(key.objectid)) {
4114 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4115 tmp_root = btrfs_read_fs_root_no_cache(
4116 root->fs_info, &key);
4118 key.offset = (u64)-1;
4119 tmp_root = btrfs_read_fs_root(
4120 root->fs_info, &key);
4122 if (IS_ERR(tmp_root)) {
4126 ret = check_fs_root(tmp_root, root_cache, &wc);
4127 if (ret == -EAGAIN) {
4128 free_root_recs_tree(root_cache);
4129 btrfs_release_path(&path);
4134 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4135 btrfs_free_fs_root(tmp_root);
4136 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4137 key.type == BTRFS_ROOT_BACKREF_KEY) {
4138 process_root_ref(leaf, path.slots[0], &key,
4145 btrfs_release_path(&path);
4147 free_extent_cache_tree(&wc.shared);
4148 if (!cache_tree_empty(&wc.shared))
4149 fprintf(stderr, "warning line %d\n", __LINE__);
4151 task_stop(ctx.info);
4157 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4158 * INODE_REF/INODE_EXTREF match.
4160 * @root: the root of the fs/file tree
4161 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4162 * @key: the key of the DIR_ITEM/DIR_INDEX
4163 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4164 * distinguish root_dir between normal dir/file
4165 * @name: the name in the INODE_REF/INODE_EXTREF
4166 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4167 * @mode: the st_mode of INODE_ITEM
4169 * Return 0 if no error occurred.
4170 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4171 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4173 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4174 * not match for normal dir/file.
4176 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4177 struct btrfs_key *key, u64 index, char *name,
4178 u32 namelen, u32 mode)
4180 struct btrfs_path path;
4181 struct extent_buffer *node;
4182 struct btrfs_dir_item *di;
4183 struct btrfs_key location;
4184 char namebuf[BTRFS_NAME_LEN] = {0};
4194 btrfs_init_path(&path);
4195 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4197 ret = DIR_ITEM_MISSING;
4201 /* Process root dir and goto out*/
4204 ret = ROOT_DIR_ERROR;
4206 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4208 ref_key->type == BTRFS_INODE_REF_KEY ?
4210 ref_key->objectid, ref_key->offset,
4211 key->type == BTRFS_DIR_ITEM_KEY ?
4212 "DIR_ITEM" : "DIR_INDEX");
4220 /* Process normal file/dir */
4222 ret = DIR_ITEM_MISSING;
4224 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4226 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4227 ref_key->objectid, ref_key->offset,
4228 key->type == BTRFS_DIR_ITEM_KEY ?
4229 "DIR_ITEM" : "DIR_INDEX",
4230 key->objectid, key->offset, namelen, name,
4231 imode_to_type(mode));
4235 /* Check whether inode_id/filetype/name match */
4236 node = path.nodes[0];
4237 slot = path.slots[0];
4238 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4239 total = btrfs_item_size_nr(node, slot);
4240 while (cur < total) {
4241 ret = DIR_ITEM_MISMATCH;
4242 name_len = btrfs_dir_name_len(node, di);
4243 data_len = btrfs_dir_data_len(node, di);
4245 btrfs_dir_item_key_to_cpu(node, di, &location);
4246 if (location.objectid != ref_key->objectid ||
4247 location.type != BTRFS_INODE_ITEM_KEY ||
4248 location.offset != 0)
4251 filetype = btrfs_dir_type(node, di);
4252 if (imode_to_type(mode) != filetype)
4255 if (cur + sizeof(*di) + name_len > total ||
4256 name_len > BTRFS_NAME_LEN) {
4257 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4259 key->type == BTRFS_DIR_ITEM_KEY ?
4260 "DIR_ITEM" : "DIR_INDEX",
4261 key->objectid, key->offset, name_len);
4263 if (cur + sizeof(*di) > total)
4265 len = min_t(u32, total - cur - sizeof(*di),
4271 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4272 if (len != namelen || strncmp(namebuf, name, len))
4278 len = sizeof(*di) + name_len + data_len;
4279 di = (struct btrfs_dir_item *)((char *)di + len);
4282 if (ret == DIR_ITEM_MISMATCH)
4284 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4286 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4287 ref_key->objectid, ref_key->offset,
4288 key->type == BTRFS_DIR_ITEM_KEY ?
4289 "DIR_ITEM" : "DIR_INDEX",
4290 key->objectid, key->offset, namelen, name,
4291 imode_to_type(mode));
4293 btrfs_release_path(&path);
4298 * Traverse the given INODE_REF and call find_dir_item() to find related
4299 * DIR_ITEM/DIR_INDEX.
4301 * @root: the root of the fs/file tree
4302 * @ref_key: the key of the INODE_REF
4303 * @refs: the count of INODE_REF
4304 * @mode: the st_mode of INODE_ITEM
4306 * Return 0 if no error occurred.
4308 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4309 struct extent_buffer *node, int slot, u64 *refs,
4312 struct btrfs_key key;
4313 struct btrfs_inode_ref *ref;
4314 char namebuf[BTRFS_NAME_LEN] = {0};
4322 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4323 total = btrfs_item_size_nr(node, slot);
4326 /* Update inode ref count */
4329 index = btrfs_inode_ref_index(node, ref);
4330 name_len = btrfs_inode_ref_name_len(node, ref);
4331 if (cur + sizeof(*ref) + name_len > total ||
4332 name_len > BTRFS_NAME_LEN) {
4333 warning("root %llu INODE_REF[%llu %llu] name too long",
4334 root->objectid, ref_key->objectid, ref_key->offset);
4336 if (total < cur + sizeof(*ref))
4338 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4343 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4345 /* Check root dir ref name */
4346 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4347 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4348 root->objectid, ref_key->objectid, ref_key->offset,
4350 err |= ROOT_DIR_ERROR;
4353 /* Find related DIR_INDEX */
4354 key.objectid = ref_key->offset;
4355 key.type = BTRFS_DIR_INDEX_KEY;
4357 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4360 /* Find related dir_item */
4361 key.objectid = ref_key->offset;
4362 key.type = BTRFS_DIR_ITEM_KEY;
4363 key.offset = btrfs_name_hash(namebuf, len);
4364 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4367 len = sizeof(*ref) + name_len;
4368 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4378 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4379 * DIR_ITEM/DIR_INDEX.
4381 * @root: the root of the fs/file tree
4382 * @ref_key: the key of the INODE_EXTREF
4383 * @refs: the count of INODE_EXTREF
4384 * @mode: the st_mode of INODE_ITEM
4386 * Return 0 if no error occurred.
4388 static int check_inode_extref(struct btrfs_root *root,
4389 struct btrfs_key *ref_key,
4390 struct extent_buffer *node, int slot, u64 *refs,
4393 struct btrfs_key key;
4394 struct btrfs_inode_extref *extref;
4395 char namebuf[BTRFS_NAME_LEN] = {0};
4405 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4406 total = btrfs_item_size_nr(node, slot);
4409 /* update inode ref count */
4411 name_len = btrfs_inode_extref_name_len(node, extref);
4412 index = btrfs_inode_extref_index(node, extref);
4413 parent = btrfs_inode_extref_parent(node, extref);
4414 if (name_len <= BTRFS_NAME_LEN) {
4417 len = BTRFS_NAME_LEN;
4418 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4419 root->objectid, ref_key->objectid, ref_key->offset);
4421 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4423 /* Check root dir ref name */
4424 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4425 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4426 root->objectid, ref_key->objectid, ref_key->offset,
4428 err |= ROOT_DIR_ERROR;
4431 /* find related dir_index */
4432 key.objectid = parent;
4433 key.type = BTRFS_DIR_INDEX_KEY;
4435 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4438 /* find related dir_item */
4439 key.objectid = parent;
4440 key.type = BTRFS_DIR_ITEM_KEY;
4441 key.offset = btrfs_name_hash(namebuf, len);
4442 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4445 len = sizeof(*extref) + name_len;
4446 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4456 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4457 * DIR_ITEM/DIR_INDEX match.
4459 * @root: the root of the fs/file tree
4460 * @key: the key of the INODE_REF/INODE_EXTREF
4461 * @name: the name in the INODE_REF/INODE_EXTREF
4462 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4463 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4465 * @ext_ref: the EXTENDED_IREF feature
4467 * Return 0 if no error occurred.
4468 * Return >0 for error bitmap
4470 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4471 char *name, int namelen, u64 index,
4472 unsigned int ext_ref)
4474 struct btrfs_path path;
4475 struct btrfs_inode_ref *ref;
4476 struct btrfs_inode_extref *extref;
4477 struct extent_buffer *node;
4478 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4489 btrfs_init_path(&path);
4490 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4492 ret = INODE_REF_MISSING;
4496 node = path.nodes[0];
4497 slot = path.slots[0];
4499 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4500 total = btrfs_item_size_nr(node, slot);
4502 /* Iterate all entry of INODE_REF */
4503 while (cur < total) {
4504 ret = INODE_REF_MISSING;
4506 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4507 ref_index = btrfs_inode_ref_index(node, ref);
4508 if (index != (u64)-1 && index != ref_index)
4511 if (cur + sizeof(*ref) + ref_namelen > total ||
4512 ref_namelen > BTRFS_NAME_LEN) {
4513 warning("root %llu INODE %s[%llu %llu] name too long",
4515 key->type == BTRFS_INODE_REF_KEY ?
4517 key->objectid, key->offset);
4519 if (cur + sizeof(*ref) > total)
4521 len = min_t(u32, total - cur - sizeof(*ref),
4527 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4530 if (len != namelen || strncmp(ref_namebuf, name, len))
4536 len = sizeof(*ref) + ref_namelen;
4537 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4542 /* Skip if not support EXTENDED_IREF feature */
4546 btrfs_release_path(&path);
4547 btrfs_init_path(&path);
4549 dir_id = key->offset;
4550 key->type = BTRFS_INODE_EXTREF_KEY;
4551 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4553 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4555 ret = INODE_REF_MISSING;
4559 node = path.nodes[0];
4560 slot = path.slots[0];
4562 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4564 total = btrfs_item_size_nr(node, slot);
4566 /* Iterate all entry of INODE_EXTREF */
4567 while (cur < total) {
4568 ret = INODE_REF_MISSING;
4570 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4571 ref_index = btrfs_inode_extref_index(node, extref);
4572 parent = btrfs_inode_extref_parent(node, extref);
4573 if (index != (u64)-1 && index != ref_index)
4576 if (parent != dir_id)
4579 if (ref_namelen <= BTRFS_NAME_LEN) {
4582 len = BTRFS_NAME_LEN;
4583 warning("root %llu INODE %s[%llu %llu] name too long",
4585 key->type == BTRFS_INODE_REF_KEY ?
4587 key->objectid, key->offset);
4589 read_extent_buffer(node, ref_namebuf,
4590 (unsigned long)(extref + 1), len);
4592 if (len != namelen || strncmp(ref_namebuf, name, len))
4599 len = sizeof(*extref) + ref_namelen;
4600 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4605 btrfs_release_path(&path);
4610 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4611 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4613 * @root: the root of the fs/file tree
4614 * @key: the key of the INODE_REF/INODE_EXTREF
4615 * @size: the st_size of the INODE_ITEM
4616 * @ext_ref: the EXTENDED_IREF feature
4618 * Return 0 if no error occurred.
4620 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4621 struct extent_buffer *node, int slot, u64 *size,
4622 unsigned int ext_ref)
4624 struct btrfs_dir_item *di;
4625 struct btrfs_inode_item *ii;
4626 struct btrfs_path path;
4627 struct btrfs_key location;
4628 char namebuf[BTRFS_NAME_LEN] = {0};
4641 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4642 * ignore index check.
4644 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4646 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4647 total = btrfs_item_size_nr(node, slot);
4649 while (cur < total) {
4650 data_len = btrfs_dir_data_len(node, di);
4652 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4653 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4654 "DIR_ITEM" : "DIR_INDEX",
4655 key->objectid, key->offset, data_len);
4657 name_len = btrfs_dir_name_len(node, di);
4658 if (cur + sizeof(*di) + name_len > total ||
4659 name_len > BTRFS_NAME_LEN) {
4660 warning("root %llu %s[%llu %llu] name too long",
4662 key->type == BTRFS_DIR_ITEM_KEY ?
4663 "DIR_ITEM" : "DIR_INDEX",
4664 key->objectid, key->offset);
4666 if (cur + sizeof(*di) > total)
4668 len = min_t(u32, total - cur - sizeof(*di),
4673 (*size) += name_len;
4675 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4676 filetype = btrfs_dir_type(node, di);
4678 if (key->type == BTRFS_DIR_ITEM_KEY &&
4679 key->offset != btrfs_name_hash(namebuf, len)) {
4681 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4682 root->objectid, key->objectid, key->offset,
4683 namebuf, len, filetype, key->offset,
4684 btrfs_name_hash(namebuf, len));
4687 btrfs_init_path(&path);
4688 btrfs_dir_item_key_to_cpu(node, di, &location);
4690 /* Ignore related ROOT_ITEM check */
4691 if (location.type == BTRFS_ROOT_ITEM_KEY)
4694 /* Check relative INODE_ITEM(existence/filetype) */
4695 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4697 err |= INODE_ITEM_MISSING;
4698 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4699 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4700 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4701 key->offset, location.objectid, name_len,
4706 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4707 struct btrfs_inode_item);
4708 mode = btrfs_inode_mode(path.nodes[0], ii);
4710 if (imode_to_type(mode) != filetype) {
4711 err |= INODE_ITEM_MISMATCH;
4712 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4713 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4714 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4715 key->offset, name_len, namebuf, filetype);
4718 /* Check relative INODE_REF/INODE_EXTREF */
4719 location.type = BTRFS_INODE_REF_KEY;
4720 location.offset = key->objectid;
4721 ret = find_inode_ref(root, &location, namebuf, len,
4724 if (ret & INODE_REF_MISSING)
4725 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4726 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4727 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4728 key->offset, name_len, namebuf, filetype);
4731 btrfs_release_path(&path);
4732 len = sizeof(*di) + name_len + data_len;
4733 di = (struct btrfs_dir_item *)((char *)di + len);
4736 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4737 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4738 root->objectid, key->objectid, key->offset);
4747 * Check file extent datasum/hole, update the size of the file extents,
4748 * check and update the last offset of the file extent.
4750 * @root: the root of fs/file tree.
4751 * @fkey: the key of the file extent.
4752 * @nodatasum: INODE_NODATASUM feature.
4753 * @size: the sum of all EXTENT_DATA items size for this inode.
4754 * @end: the offset of the last extent.
4756 * Return 0 if no error occurred.
4758 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4759 struct extent_buffer *node, int slot,
4760 unsigned int nodatasum, u64 *size, u64 *end)
4762 struct btrfs_file_extent_item *fi;
4765 u64 extent_num_bytes;
4767 u64 csum_found; /* In byte size, sectorsize aligned */
4768 u64 search_start; /* Logical range start we search for csum */
4769 u64 search_len; /* Logical range len we search for csum */
4770 unsigned int extent_type;
4771 unsigned int is_hole;
4776 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4778 /* Check inline extent */
4779 extent_type = btrfs_file_extent_type(node, fi);
4780 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4781 struct btrfs_item *e = btrfs_item_nr(slot);
4782 u32 item_inline_len;
4784 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4785 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4786 compressed = btrfs_file_extent_compression(node, fi);
4787 if (extent_num_bytes == 0) {
4789 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4790 root->objectid, fkey->objectid, fkey->offset);
4791 err |= FILE_EXTENT_ERROR;
4793 if (!compressed && extent_num_bytes != item_inline_len) {
4795 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4796 root->objectid, fkey->objectid, fkey->offset,
4797 extent_num_bytes, item_inline_len);
4798 err |= FILE_EXTENT_ERROR;
4800 *end += extent_num_bytes;
4801 *size += extent_num_bytes;
4805 /* Check extent type */
4806 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4807 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4808 err |= FILE_EXTENT_ERROR;
4809 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4810 root->objectid, fkey->objectid, fkey->offset);
4814 /* Check REG_EXTENT/PREALLOC_EXTENT */
4815 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4816 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4817 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4818 extent_offset = btrfs_file_extent_offset(node, fi);
4819 compressed = btrfs_file_extent_compression(node, fi);
4820 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4823 * Check EXTENT_DATA csum
4825 * For plain (uncompressed) extent, we should only check the range
4826 * we're referring to, as it's possible that part of prealloc extent
4827 * has been written, and has csum:
4829 * |<--- Original large preallocated extent A ---->|
4830 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4833 * For compressed extent, we should check the whole range.
4836 search_start = disk_bytenr + extent_offset;
4837 search_len = extent_num_bytes;
4839 search_start = disk_bytenr;
4840 search_len = disk_num_bytes;
4842 ret = count_csum_range(root, search_start, search_len, &csum_found);
4843 if (csum_found > 0 && nodatasum) {
4844 err |= ODD_CSUM_ITEM;
4845 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4846 root->objectid, fkey->objectid, fkey->offset);
4847 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4848 !is_hole && (ret < 0 || csum_found < search_len)) {
4849 err |= CSUM_ITEM_MISSING;
4850 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4851 root->objectid, fkey->objectid, fkey->offset,
4852 csum_found, search_len);
4853 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4854 err |= ODD_CSUM_ITEM;
4855 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4856 root->objectid, fkey->objectid, fkey->offset, csum_found);
4859 /* Check EXTENT_DATA hole */
4860 if (!no_holes && *end != fkey->offset) {
4861 err |= FILE_EXTENT_ERROR;
4862 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4863 root->objectid, fkey->objectid, fkey->offset);
4866 *end += extent_num_bytes;
4868 *size += extent_num_bytes;
4874 * Check INODE_ITEM and related ITEMs (the same inode number)
4875 * 1. check link count
4876 * 2. check inode ref/extref
4877 * 3. check dir item/index
4879 * @ext_ref: the EXTENDED_IREF feature
4881 * Return 0 if no error occurred.
4882 * Return >0 for error or hit the traversal is done(by error bitmap)
4884 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4885 unsigned int ext_ref)
4887 struct extent_buffer *node;
4888 struct btrfs_inode_item *ii;
4889 struct btrfs_key key;
4898 u64 extent_size = 0;
4900 unsigned int nodatasum;
4905 node = path->nodes[0];
4906 slot = path->slots[0];
4908 btrfs_item_key_to_cpu(node, &key, slot);
4909 inode_id = key.objectid;
4911 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4912 ret = btrfs_next_item(root, path);
4918 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4919 isize = btrfs_inode_size(node, ii);
4920 nbytes = btrfs_inode_nbytes(node, ii);
4921 mode = btrfs_inode_mode(node, ii);
4922 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4923 nlink = btrfs_inode_nlink(node, ii);
4924 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4927 ret = btrfs_next_item(root, path);
4929 /* out will fill 'err' rusing current statistics */
4931 } else if (ret > 0) {
4936 node = path->nodes[0];
4937 slot = path->slots[0];
4938 btrfs_item_key_to_cpu(node, &key, slot);
4939 if (key.objectid != inode_id)
4943 case BTRFS_INODE_REF_KEY:
4944 ret = check_inode_ref(root, &key, node, slot, &refs,
4948 case BTRFS_INODE_EXTREF_KEY:
4949 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4950 warning("root %llu EXTREF[%llu %llu] isn't supported",
4951 root->objectid, key.objectid,
4953 ret = check_inode_extref(root, &key, node, slot, &refs,
4957 case BTRFS_DIR_ITEM_KEY:
4958 case BTRFS_DIR_INDEX_KEY:
4960 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4961 root->objectid, inode_id,
4962 imode_to_type(mode), key.objectid,
4965 ret = check_dir_item(root, &key, node, slot, &size,
4969 case BTRFS_EXTENT_DATA_KEY:
4971 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4972 root->objectid, inode_id, key.objectid,
4975 ret = check_file_extent(root, &key, node, slot,
4976 nodatasum, &extent_size,
4980 case BTRFS_XATTR_ITEM_KEY:
4983 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4984 key.objectid, key.type, key.offset);
4989 /* verify INODE_ITEM nlink/isize/nbytes */
4992 err |= LINK_COUNT_ERROR;
4993 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4994 root->objectid, inode_id, nlink);
4998 * Just a warning, as dir inode nbytes is just an
4999 * instructive value.
5001 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5002 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5003 root->objectid, inode_id,
5004 root->fs_info->nodesize);
5007 if (isize != size) {
5009 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5010 root->objectid, inode_id, isize, size);
5013 if (nlink != refs) {
5014 err |= LINK_COUNT_ERROR;
5015 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5016 root->objectid, inode_id, nlink, refs);
5017 } else if (!nlink) {
5021 if (!nbytes && !no_holes && extent_end < isize) {
5022 err |= NBYTES_ERROR;
5023 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5024 root->objectid, inode_id, isize);
5027 if (nbytes != extent_size) {
5028 err |= NBYTES_ERROR;
5029 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5030 root->objectid, inode_id, nbytes, extent_size);
5037 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5039 struct btrfs_path path;
5040 struct btrfs_key key;
5044 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5045 key.type = BTRFS_INODE_ITEM_KEY;
5048 /* For root being dropped, we don't need to check first inode */
5049 if (btrfs_root_refs(&root->root_item) == 0 &&
5050 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5054 btrfs_init_path(&path);
5056 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5061 err |= INODE_ITEM_MISSING;
5062 error("first inode item of root %llu is missing",
5066 err |= check_inode_item(root, &path, ext_ref);
5071 btrfs_release_path(&path);
5076 * Iterate all item on the tree and call check_inode_item() to check.
5078 * @root: the root of the tree to be checked.
5079 * @ext_ref: the EXTENDED_IREF feature
5081 * Return 0 if no error found.
5082 * Return <0 for error.
5084 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5086 struct btrfs_path path;
5087 struct node_refs nrefs;
5088 struct btrfs_root_item *root_item = &root->root_item;
5094 * We need to manually check the first inode item(256)
5095 * As the following traversal function will only start from
5096 * the first inode item in the leaf, if inode item(256) is missing
5097 * we will just skip it forever.
5099 ret = check_fs_first_inode(root, ext_ref);
5103 memset(&nrefs, 0, sizeof(nrefs));
5104 level = btrfs_header_level(root->node);
5105 btrfs_init_path(&path);
5107 if (btrfs_root_refs(root_item) > 0 ||
5108 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5109 path.nodes[level] = root->node;
5110 path.slots[level] = 0;
5111 extent_buffer_get(root->node);
5113 struct btrfs_key key;
5115 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5116 level = root_item->drop_level;
5117 path.lowest_level = level;
5118 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5125 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5128 /* if ret is negative, walk shall stop */
5134 ret = walk_up_tree_v2(root, &path, &level);
5136 /* Normal exit, reset ret to err */
5143 btrfs_release_path(&path);
5148 * Find the relative ref for root_ref and root_backref.
5150 * @root: the root of the root tree.
5151 * @ref_key: the key of the root ref.
5153 * Return 0 if no error occurred.
5155 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5156 struct extent_buffer *node, int slot)
5158 struct btrfs_path path;
5159 struct btrfs_key key;
5160 struct btrfs_root_ref *ref;
5161 struct btrfs_root_ref *backref;
5162 char ref_name[BTRFS_NAME_LEN] = {0};
5163 char backref_name[BTRFS_NAME_LEN] = {0};
5169 u32 backref_namelen;
5174 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5175 ref_dirid = btrfs_root_ref_dirid(node, ref);
5176 ref_seq = btrfs_root_ref_sequence(node, ref);
5177 ref_namelen = btrfs_root_ref_name_len(node, ref);
5179 if (ref_namelen <= BTRFS_NAME_LEN) {
5182 len = BTRFS_NAME_LEN;
5183 warning("%s[%llu %llu] ref_name too long",
5184 ref_key->type == BTRFS_ROOT_REF_KEY ?
5185 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5188 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5190 /* Find relative root_ref */
5191 key.objectid = ref_key->offset;
5192 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5193 key.offset = ref_key->objectid;
5195 btrfs_init_path(&path);
5196 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5198 err |= ROOT_REF_MISSING;
5199 error("%s[%llu %llu] couldn't find relative ref",
5200 ref_key->type == BTRFS_ROOT_REF_KEY ?
5201 "ROOT_REF" : "ROOT_BACKREF",
5202 ref_key->objectid, ref_key->offset);
5206 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5207 struct btrfs_root_ref);
5208 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5209 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5210 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5212 if (backref_namelen <= BTRFS_NAME_LEN) {
5213 len = backref_namelen;
5215 len = BTRFS_NAME_LEN;
5216 warning("%s[%llu %llu] ref_name too long",
5217 key.type == BTRFS_ROOT_REF_KEY ?
5218 "ROOT_REF" : "ROOT_BACKREF",
5219 key.objectid, key.offset);
5221 read_extent_buffer(path.nodes[0], backref_name,
5222 (unsigned long)(backref + 1), len);
5224 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5225 ref_namelen != backref_namelen ||
5226 strncmp(ref_name, backref_name, len)) {
5227 err |= ROOT_REF_MISMATCH;
5228 error("%s[%llu %llu] mismatch relative ref",
5229 ref_key->type == BTRFS_ROOT_REF_KEY ?
5230 "ROOT_REF" : "ROOT_BACKREF",
5231 ref_key->objectid, ref_key->offset);
5234 btrfs_release_path(&path);
5239 * Check all fs/file tree in low_memory mode.
5241 * 1. for fs tree root item, call check_fs_root_v2()
5242 * 2. for fs tree root ref/backref, call check_root_ref()
5244 * Return 0 if no error occurred.
5246 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5248 struct btrfs_root *tree_root = fs_info->tree_root;
5249 struct btrfs_root *cur_root = NULL;
5250 struct btrfs_path path;
5251 struct btrfs_key key;
5252 struct extent_buffer *node;
5253 unsigned int ext_ref;
5258 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5260 btrfs_init_path(&path);
5261 key.objectid = BTRFS_FS_TREE_OBJECTID;
5263 key.type = BTRFS_ROOT_ITEM_KEY;
5265 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5269 } else if (ret > 0) {
5275 node = path.nodes[0];
5276 slot = path.slots[0];
5277 btrfs_item_key_to_cpu(node, &key, slot);
5278 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5280 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5281 fs_root_objectid(key.objectid)) {
5282 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5283 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5286 key.offset = (u64)-1;
5287 cur_root = btrfs_read_fs_root(fs_info, &key);
5290 if (IS_ERR(cur_root)) {
5291 error("Fail to read fs/subvol tree: %lld",
5297 ret = check_fs_root_v2(cur_root, ext_ref);
5300 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5301 btrfs_free_fs_root(cur_root);
5302 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5303 key.type == BTRFS_ROOT_BACKREF_KEY) {
5304 ret = check_root_ref(tree_root, &key, node, slot);
5308 ret = btrfs_next_item(tree_root, &path);
5318 btrfs_release_path(&path);
5322 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5324 struct list_head *cur = rec->backrefs.next;
5325 struct extent_backref *back;
5326 struct tree_backref *tback;
5327 struct data_backref *dback;
5331 while(cur != &rec->backrefs) {
5332 back = to_extent_backref(cur);
5334 if (!back->found_extent_tree) {
5338 if (back->is_data) {
5339 dback = to_data_backref(back);
5340 fprintf(stderr, "Backref %llu %s %llu"
5341 " owner %llu offset %llu num_refs %lu"
5342 " not found in extent tree\n",
5343 (unsigned long long)rec->start,
5344 back->full_backref ?
5346 back->full_backref ?
5347 (unsigned long long)dback->parent:
5348 (unsigned long long)dback->root,
5349 (unsigned long long)dback->owner,
5350 (unsigned long long)dback->offset,
5351 (unsigned long)dback->num_refs);
5353 tback = to_tree_backref(back);
5354 fprintf(stderr, "Backref %llu parent %llu"
5355 " root %llu not found in extent tree\n",
5356 (unsigned long long)rec->start,
5357 (unsigned long long)tback->parent,
5358 (unsigned long long)tback->root);
5361 if (!back->is_data && !back->found_ref) {
5365 tback = to_tree_backref(back);
5366 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5367 (unsigned long long)rec->start,
5368 back->full_backref ? "parent" : "root",
5369 back->full_backref ?
5370 (unsigned long long)tback->parent :
5371 (unsigned long long)tback->root, back);
5373 if (back->is_data) {
5374 dback = to_data_backref(back);
5375 if (dback->found_ref != dback->num_refs) {
5379 fprintf(stderr, "Incorrect local backref count"
5380 " on %llu %s %llu owner %llu"
5381 " offset %llu found %u wanted %u back %p\n",
5382 (unsigned long long)rec->start,
5383 back->full_backref ?
5385 back->full_backref ?
5386 (unsigned long long)dback->parent:
5387 (unsigned long long)dback->root,
5388 (unsigned long long)dback->owner,
5389 (unsigned long long)dback->offset,
5390 dback->found_ref, dback->num_refs, back);
5392 if (dback->disk_bytenr != rec->start) {
5396 fprintf(stderr, "Backref disk bytenr does not"
5397 " match extent record, bytenr=%llu, "
5398 "ref bytenr=%llu\n",
5399 (unsigned long long)rec->start,
5400 (unsigned long long)dback->disk_bytenr);
5403 if (dback->bytes != rec->nr) {
5407 fprintf(stderr, "Backref bytes do not match "
5408 "extent backref, bytenr=%llu, ref "
5409 "bytes=%llu, backref bytes=%llu\n",
5410 (unsigned long long)rec->start,
5411 (unsigned long long)rec->nr,
5412 (unsigned long long)dback->bytes);
5415 if (!back->is_data) {
5418 dback = to_data_backref(back);
5419 found += dback->found_ref;
5422 if (found != rec->refs) {
5426 fprintf(stderr, "Incorrect global backref count "
5427 "on %llu found %llu wanted %llu\n",
5428 (unsigned long long)rec->start,
5429 (unsigned long long)found,
5430 (unsigned long long)rec->refs);
5436 static int free_all_extent_backrefs(struct extent_record *rec)
5438 struct extent_backref *back;
5439 struct list_head *cur;
5440 while (!list_empty(&rec->backrefs)) {
5441 cur = rec->backrefs.next;
5442 back = to_extent_backref(cur);
5449 static void free_extent_record_cache(struct cache_tree *extent_cache)
5451 struct cache_extent *cache;
5452 struct extent_record *rec;
5455 cache = first_cache_extent(extent_cache);
5458 rec = container_of(cache, struct extent_record, cache);
5459 remove_cache_extent(extent_cache, cache);
5460 free_all_extent_backrefs(rec);
5465 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5466 struct extent_record *rec)
5468 if (rec->content_checked && rec->owner_ref_checked &&
5469 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5470 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5471 !rec->bad_full_backref && !rec->crossing_stripes &&
5472 !rec->wrong_chunk_type) {
5473 remove_cache_extent(extent_cache, &rec->cache);
5474 free_all_extent_backrefs(rec);
5475 list_del_init(&rec->list);
5481 static int check_owner_ref(struct btrfs_root *root,
5482 struct extent_record *rec,
5483 struct extent_buffer *buf)
5485 struct extent_backref *node;
5486 struct tree_backref *back;
5487 struct btrfs_root *ref_root;
5488 struct btrfs_key key;
5489 struct btrfs_path path;
5490 struct extent_buffer *parent;
5495 list_for_each_entry(node, &rec->backrefs, list) {
5498 if (!node->found_ref)
5500 if (node->full_backref)
5502 back = to_tree_backref(node);
5503 if (btrfs_header_owner(buf) == back->root)
5506 BUG_ON(rec->is_root);
5508 /* try to find the block by search corresponding fs tree */
5509 key.objectid = btrfs_header_owner(buf);
5510 key.type = BTRFS_ROOT_ITEM_KEY;
5511 key.offset = (u64)-1;
5513 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5514 if (IS_ERR(ref_root))
5517 level = btrfs_header_level(buf);
5519 btrfs_item_key_to_cpu(buf, &key, 0);
5521 btrfs_node_key_to_cpu(buf, &key, 0);
5523 btrfs_init_path(&path);
5524 path.lowest_level = level + 1;
5525 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5529 parent = path.nodes[level + 1];
5530 if (parent && buf->start == btrfs_node_blockptr(parent,
5531 path.slots[level + 1]))
5534 btrfs_release_path(&path);
5535 return found ? 0 : 1;
5538 static int is_extent_tree_record(struct extent_record *rec)
5540 struct list_head *cur = rec->backrefs.next;
5541 struct extent_backref *node;
5542 struct tree_backref *back;
5545 while(cur != &rec->backrefs) {
5546 node = to_extent_backref(cur);
5550 back = to_tree_backref(node);
5551 if (node->full_backref)
5553 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5560 static int record_bad_block_io(struct btrfs_fs_info *info,
5561 struct cache_tree *extent_cache,
5564 struct extent_record *rec;
5565 struct cache_extent *cache;
5566 struct btrfs_key key;
5568 cache = lookup_cache_extent(extent_cache, start, len);
5572 rec = container_of(cache, struct extent_record, cache);
5573 if (!is_extent_tree_record(rec))
5576 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5577 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5580 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5581 struct extent_buffer *buf, int slot)
5583 if (btrfs_header_level(buf)) {
5584 struct btrfs_key_ptr ptr1, ptr2;
5586 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5587 sizeof(struct btrfs_key_ptr));
5588 read_extent_buffer(buf, &ptr2,
5589 btrfs_node_key_ptr_offset(slot + 1),
5590 sizeof(struct btrfs_key_ptr));
5591 write_extent_buffer(buf, &ptr1,
5592 btrfs_node_key_ptr_offset(slot + 1),
5593 sizeof(struct btrfs_key_ptr));
5594 write_extent_buffer(buf, &ptr2,
5595 btrfs_node_key_ptr_offset(slot),
5596 sizeof(struct btrfs_key_ptr));
5598 struct btrfs_disk_key key;
5599 btrfs_node_key(buf, &key, 0);
5600 btrfs_fixup_low_keys(root, path, &key,
5601 btrfs_header_level(buf) + 1);
5604 struct btrfs_item *item1, *item2;
5605 struct btrfs_key k1, k2;
5606 char *item1_data, *item2_data;
5607 u32 item1_offset, item2_offset, item1_size, item2_size;
5609 item1 = btrfs_item_nr(slot);
5610 item2 = btrfs_item_nr(slot + 1);
5611 btrfs_item_key_to_cpu(buf, &k1, slot);
5612 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5613 item1_offset = btrfs_item_offset(buf, item1);
5614 item2_offset = btrfs_item_offset(buf, item2);
5615 item1_size = btrfs_item_size(buf, item1);
5616 item2_size = btrfs_item_size(buf, item2);
5618 item1_data = malloc(item1_size);
5621 item2_data = malloc(item2_size);
5627 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5628 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5630 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5631 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5635 btrfs_set_item_offset(buf, item1, item2_offset);
5636 btrfs_set_item_offset(buf, item2, item1_offset);
5637 btrfs_set_item_size(buf, item1, item2_size);
5638 btrfs_set_item_size(buf, item2, item1_size);
5640 path->slots[0] = slot;
5641 btrfs_set_item_key_unsafe(root, path, &k2);
5642 path->slots[0] = slot + 1;
5643 btrfs_set_item_key_unsafe(root, path, &k1);
5648 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5650 struct extent_buffer *buf;
5651 struct btrfs_key k1, k2;
5653 int level = path->lowest_level;
5656 buf = path->nodes[level];
5657 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5659 btrfs_node_key_to_cpu(buf, &k1, i);
5660 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5662 btrfs_item_key_to_cpu(buf, &k1, i);
5663 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5665 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5667 ret = swap_values(root, path, buf, i);
5670 btrfs_mark_buffer_dirty(buf);
5676 static int delete_bogus_item(struct btrfs_root *root,
5677 struct btrfs_path *path,
5678 struct extent_buffer *buf, int slot)
5680 struct btrfs_key key;
5681 int nritems = btrfs_header_nritems(buf);
5683 btrfs_item_key_to_cpu(buf, &key, slot);
5685 /* These are all the keys we can deal with missing. */
5686 if (key.type != BTRFS_DIR_INDEX_KEY &&
5687 key.type != BTRFS_EXTENT_ITEM_KEY &&
5688 key.type != BTRFS_METADATA_ITEM_KEY &&
5689 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5690 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5693 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5694 (unsigned long long)key.objectid, key.type,
5695 (unsigned long long)key.offset, slot, buf->start);
5696 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5697 btrfs_item_nr_offset(slot + 1),
5698 sizeof(struct btrfs_item) *
5699 (nritems - slot - 1));
5700 btrfs_set_header_nritems(buf, nritems - 1);
5702 struct btrfs_disk_key disk_key;
5704 btrfs_item_key(buf, &disk_key, 0);
5705 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5707 btrfs_mark_buffer_dirty(buf);
5711 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5713 struct extent_buffer *buf;
5717 /* We should only get this for leaves */
5718 BUG_ON(path->lowest_level);
5719 buf = path->nodes[0];
5721 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5722 unsigned int shift = 0, offset;
5724 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5725 BTRFS_LEAF_DATA_SIZE(root)) {
5726 if (btrfs_item_end_nr(buf, i) >
5727 BTRFS_LEAF_DATA_SIZE(root)) {
5728 ret = delete_bogus_item(root, path, buf, i);
5731 fprintf(stderr, "item is off the end of the "
5732 "leaf, can't fix\n");
5736 shift = BTRFS_LEAF_DATA_SIZE(root) -
5737 btrfs_item_end_nr(buf, i);
5738 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5739 btrfs_item_offset_nr(buf, i - 1)) {
5740 if (btrfs_item_end_nr(buf, i) >
5741 btrfs_item_offset_nr(buf, i - 1)) {
5742 ret = delete_bogus_item(root, path, buf, i);
5745 fprintf(stderr, "items overlap, can't fix\n");
5749 shift = btrfs_item_offset_nr(buf, i - 1) -
5750 btrfs_item_end_nr(buf, i);
5755 printf("Shifting item nr %d by %u bytes in block %llu\n",
5756 i, shift, (unsigned long long)buf->start);
5757 offset = btrfs_item_offset_nr(buf, i);
5758 memmove_extent_buffer(buf,
5759 btrfs_leaf_data(buf) + offset + shift,
5760 btrfs_leaf_data(buf) + offset,
5761 btrfs_item_size_nr(buf, i));
5762 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5764 btrfs_mark_buffer_dirty(buf);
5768 * We may have moved things, in which case we want to exit so we don't
5769 * write those changes out. Once we have proper abort functionality in
5770 * progs this can be changed to something nicer.
5777 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5778 * then just return -EIO.
5780 static int try_to_fix_bad_block(struct btrfs_root *root,
5781 struct extent_buffer *buf,
5782 enum btrfs_tree_block_status status)
5784 struct btrfs_trans_handle *trans;
5785 struct ulist *roots;
5786 struct ulist_node *node;
5787 struct btrfs_root *search_root;
5788 struct btrfs_path path;
5789 struct ulist_iterator iter;
5790 struct btrfs_key root_key, key;
5793 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5794 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5797 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5801 btrfs_init_path(&path);
5802 ULIST_ITER_INIT(&iter);
5803 while ((node = ulist_next(roots, &iter))) {
5804 root_key.objectid = node->val;
5805 root_key.type = BTRFS_ROOT_ITEM_KEY;
5806 root_key.offset = (u64)-1;
5808 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5815 trans = btrfs_start_transaction(search_root, 0);
5816 if (IS_ERR(trans)) {
5817 ret = PTR_ERR(trans);
5821 path.lowest_level = btrfs_header_level(buf);
5822 path.skip_check_block = 1;
5823 if (path.lowest_level)
5824 btrfs_node_key_to_cpu(buf, &key, 0);
5826 btrfs_item_key_to_cpu(buf, &key, 0);
5827 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5830 btrfs_commit_transaction(trans, search_root);
5833 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5834 ret = fix_key_order(search_root, &path);
5835 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5836 ret = fix_item_offset(search_root, &path);
5838 btrfs_commit_transaction(trans, search_root);
5841 btrfs_release_path(&path);
5842 btrfs_commit_transaction(trans, search_root);
5845 btrfs_release_path(&path);
5849 static int check_block(struct btrfs_root *root,
5850 struct cache_tree *extent_cache,
5851 struct extent_buffer *buf, u64 flags)
5853 struct extent_record *rec;
5854 struct cache_extent *cache;
5855 struct btrfs_key key;
5856 enum btrfs_tree_block_status status;
5860 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5863 rec = container_of(cache, struct extent_record, cache);
5864 rec->generation = btrfs_header_generation(buf);
5866 level = btrfs_header_level(buf);
5867 if (btrfs_header_nritems(buf) > 0) {
5870 btrfs_item_key_to_cpu(buf, &key, 0);
5872 btrfs_node_key_to_cpu(buf, &key, 0);
5874 rec->info_objectid = key.objectid;
5876 rec->info_level = level;
5878 if (btrfs_is_leaf(buf))
5879 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5881 status = btrfs_check_node(root, &rec->parent_key, buf);
5883 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5885 status = try_to_fix_bad_block(root, buf, status);
5886 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5888 fprintf(stderr, "bad block %llu\n",
5889 (unsigned long long)buf->start);
5892 * Signal to callers we need to start the scan over
5893 * again since we'll have cowed blocks.
5898 rec->content_checked = 1;
5899 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5900 rec->owner_ref_checked = 1;
5902 ret = check_owner_ref(root, rec, buf);
5904 rec->owner_ref_checked = 1;
5908 maybe_free_extent_rec(extent_cache, rec);
5912 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5913 u64 parent, u64 root)
5915 struct list_head *cur = rec->backrefs.next;
5916 struct extent_backref *node;
5917 struct tree_backref *back;
5919 while(cur != &rec->backrefs) {
5920 node = to_extent_backref(cur);
5924 back = to_tree_backref(node);
5926 if (!node->full_backref)
5928 if (parent == back->parent)
5931 if (node->full_backref)
5933 if (back->root == root)
5940 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5941 u64 parent, u64 root)
5943 struct tree_backref *ref = malloc(sizeof(*ref));
5947 memset(&ref->node, 0, sizeof(ref->node));
5949 ref->parent = parent;
5950 ref->node.full_backref = 1;
5953 ref->node.full_backref = 0;
5955 list_add_tail(&ref->node.list, &rec->backrefs);
5960 static struct data_backref *find_data_backref(struct extent_record *rec,
5961 u64 parent, u64 root,
5962 u64 owner, u64 offset,
5964 u64 disk_bytenr, u64 bytes)
5966 struct list_head *cur = rec->backrefs.next;
5967 struct extent_backref *node;
5968 struct data_backref *back;
5970 while(cur != &rec->backrefs) {
5971 node = to_extent_backref(cur);
5975 back = to_data_backref(node);
5977 if (!node->full_backref)
5979 if (parent == back->parent)
5982 if (node->full_backref)
5984 if (back->root == root && back->owner == owner &&
5985 back->offset == offset) {
5986 if (found_ref && node->found_ref &&
5987 (back->bytes != bytes ||
5988 back->disk_bytenr != disk_bytenr))
5997 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5998 u64 parent, u64 root,
5999 u64 owner, u64 offset,
6002 struct data_backref *ref = malloc(sizeof(*ref));
6006 memset(&ref->node, 0, sizeof(ref->node));
6007 ref->node.is_data = 1;
6010 ref->parent = parent;
6013 ref->node.full_backref = 1;
6017 ref->offset = offset;
6018 ref->node.full_backref = 0;
6020 ref->bytes = max_size;
6023 list_add_tail(&ref->node.list, &rec->backrefs);
6024 if (max_size > rec->max_size)
6025 rec->max_size = max_size;
6029 /* Check if the type of extent matches with its chunk */
6030 static void check_extent_type(struct extent_record *rec)
6032 struct btrfs_block_group_cache *bg_cache;
6034 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6038 /* data extent, check chunk directly*/
6039 if (!rec->metadata) {
6040 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6041 rec->wrong_chunk_type = 1;
6045 /* metadata extent, check the obvious case first */
6046 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6047 BTRFS_BLOCK_GROUP_METADATA))) {
6048 rec->wrong_chunk_type = 1;
6053 * Check SYSTEM extent, as it's also marked as metadata, we can only
6054 * make sure it's a SYSTEM extent by its backref
6056 if (!list_empty(&rec->backrefs)) {
6057 struct extent_backref *node;
6058 struct tree_backref *tback;
6061 node = to_extent_backref(rec->backrefs.next);
6062 if (node->is_data) {
6063 /* tree block shouldn't have data backref */
6064 rec->wrong_chunk_type = 1;
6067 tback = container_of(node, struct tree_backref, node);
6069 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6070 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6072 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6073 if (!(bg_cache->flags & bg_type))
6074 rec->wrong_chunk_type = 1;
6079 * Allocate a new extent record, fill default values from @tmpl and insert int
6080 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6081 * the cache, otherwise it fails.
6083 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6084 struct extent_record *tmpl)
6086 struct extent_record *rec;
6089 BUG_ON(tmpl->max_size == 0);
6090 rec = malloc(sizeof(*rec));
6093 rec->start = tmpl->start;
6094 rec->max_size = tmpl->max_size;
6095 rec->nr = max(tmpl->nr, tmpl->max_size);
6096 rec->found_rec = tmpl->found_rec;
6097 rec->content_checked = tmpl->content_checked;
6098 rec->owner_ref_checked = tmpl->owner_ref_checked;
6099 rec->num_duplicates = 0;
6100 rec->metadata = tmpl->metadata;
6101 rec->flag_block_full_backref = FLAG_UNSET;
6102 rec->bad_full_backref = 0;
6103 rec->crossing_stripes = 0;
6104 rec->wrong_chunk_type = 0;
6105 rec->is_root = tmpl->is_root;
6106 rec->refs = tmpl->refs;
6107 rec->extent_item_refs = tmpl->extent_item_refs;
6108 rec->parent_generation = tmpl->parent_generation;
6109 INIT_LIST_HEAD(&rec->backrefs);
6110 INIT_LIST_HEAD(&rec->dups);
6111 INIT_LIST_HEAD(&rec->list);
6112 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6113 rec->cache.start = tmpl->start;
6114 rec->cache.size = tmpl->nr;
6115 ret = insert_cache_extent(extent_cache, &rec->cache);
6120 bytes_used += rec->nr;
6123 rec->crossing_stripes = check_crossing_stripes(global_info,
6124 rec->start, global_info->nodesize);
6125 check_extent_type(rec);
6130 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6132 * - refs - if found, increase refs
6133 * - is_root - if found, set
6134 * - content_checked - if found, set
6135 * - owner_ref_checked - if found, set
6137 * If not found, create a new one, initialize and insert.
6139 static int add_extent_rec(struct cache_tree *extent_cache,
6140 struct extent_record *tmpl)
6142 struct extent_record *rec;
6143 struct cache_extent *cache;
6147 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6149 rec = container_of(cache, struct extent_record, cache);
6153 rec->nr = max(tmpl->nr, tmpl->max_size);
6156 * We need to make sure to reset nr to whatever the extent
6157 * record says was the real size, this way we can compare it to
6160 if (tmpl->found_rec) {
6161 if (tmpl->start != rec->start || rec->found_rec) {
6162 struct extent_record *tmp;
6165 if (list_empty(&rec->list))
6166 list_add_tail(&rec->list,
6167 &duplicate_extents);
6170 * We have to do this song and dance in case we
6171 * find an extent record that falls inside of
6172 * our current extent record but does not have
6173 * the same objectid.
6175 tmp = malloc(sizeof(*tmp));
6178 tmp->start = tmpl->start;
6179 tmp->max_size = tmpl->max_size;
6182 tmp->metadata = tmpl->metadata;
6183 tmp->extent_item_refs = tmpl->extent_item_refs;
6184 INIT_LIST_HEAD(&tmp->list);
6185 list_add_tail(&tmp->list, &rec->dups);
6186 rec->num_duplicates++;
6193 if (tmpl->extent_item_refs && !dup) {
6194 if (rec->extent_item_refs) {
6195 fprintf(stderr, "block %llu rec "
6196 "extent_item_refs %llu, passed %llu\n",
6197 (unsigned long long)tmpl->start,
6198 (unsigned long long)
6199 rec->extent_item_refs,
6200 (unsigned long long)tmpl->extent_item_refs);
6202 rec->extent_item_refs = tmpl->extent_item_refs;
6206 if (tmpl->content_checked)
6207 rec->content_checked = 1;
6208 if (tmpl->owner_ref_checked)
6209 rec->owner_ref_checked = 1;
6210 memcpy(&rec->parent_key, &tmpl->parent_key,
6211 sizeof(tmpl->parent_key));
6212 if (tmpl->parent_generation)
6213 rec->parent_generation = tmpl->parent_generation;
6214 if (rec->max_size < tmpl->max_size)
6215 rec->max_size = tmpl->max_size;
6218 * A metadata extent can't cross stripe_len boundary, otherwise
6219 * kernel scrub won't be able to handle it.
6220 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6224 rec->crossing_stripes = check_crossing_stripes(
6225 global_info, rec->start,
6226 global_info->nodesize);
6227 check_extent_type(rec);
6228 maybe_free_extent_rec(extent_cache, rec);
6232 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6237 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6238 u64 parent, u64 root, int found_ref)
6240 struct extent_record *rec;
6241 struct tree_backref *back;
6242 struct cache_extent *cache;
6245 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6247 struct extent_record tmpl;
6249 memset(&tmpl, 0, sizeof(tmpl));
6250 tmpl.start = bytenr;
6255 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6259 /* really a bug in cache_extent implement now */
6260 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6265 rec = container_of(cache, struct extent_record, cache);
6266 if (rec->start != bytenr) {
6268 * Several cause, from unaligned bytenr to over lapping extents
6273 back = find_tree_backref(rec, parent, root);
6275 back = alloc_tree_backref(rec, parent, root);
6281 if (back->node.found_ref) {
6282 fprintf(stderr, "Extent back ref already exists "
6283 "for %llu parent %llu root %llu \n",
6284 (unsigned long long)bytenr,
6285 (unsigned long long)parent,
6286 (unsigned long long)root);
6288 back->node.found_ref = 1;
6290 if (back->node.found_extent_tree) {
6291 fprintf(stderr, "Extent back ref already exists "
6292 "for %llu parent %llu root %llu \n",
6293 (unsigned long long)bytenr,
6294 (unsigned long long)parent,
6295 (unsigned long long)root);
6297 back->node.found_extent_tree = 1;
6299 check_extent_type(rec);
6300 maybe_free_extent_rec(extent_cache, rec);
6304 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6305 u64 parent, u64 root, u64 owner, u64 offset,
6306 u32 num_refs, int found_ref, u64 max_size)
6308 struct extent_record *rec;
6309 struct data_backref *back;
6310 struct cache_extent *cache;
6313 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6315 struct extent_record tmpl;
6317 memset(&tmpl, 0, sizeof(tmpl));
6318 tmpl.start = bytenr;
6320 tmpl.max_size = max_size;
6322 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6326 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6331 rec = container_of(cache, struct extent_record, cache);
6332 if (rec->max_size < max_size)
6333 rec->max_size = max_size;
6336 * If found_ref is set then max_size is the real size and must match the
6337 * existing refs. So if we have already found a ref then we need to
6338 * make sure that this ref matches the existing one, otherwise we need
6339 * to add a new backref so we can notice that the backrefs don't match
6340 * and we need to figure out who is telling the truth. This is to
6341 * account for that awful fsync bug I introduced where we'd end up with
6342 * a btrfs_file_extent_item that would have its length include multiple
6343 * prealloc extents or point inside of a prealloc extent.
6345 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6348 back = alloc_data_backref(rec, parent, root, owner, offset,
6354 BUG_ON(num_refs != 1);
6355 if (back->node.found_ref)
6356 BUG_ON(back->bytes != max_size);
6357 back->node.found_ref = 1;
6358 back->found_ref += 1;
6359 back->bytes = max_size;
6360 back->disk_bytenr = bytenr;
6362 rec->content_checked = 1;
6363 rec->owner_ref_checked = 1;
6365 if (back->node.found_extent_tree) {
6366 fprintf(stderr, "Extent back ref already exists "
6367 "for %llu parent %llu root %llu "
6368 "owner %llu offset %llu num_refs %lu\n",
6369 (unsigned long long)bytenr,
6370 (unsigned long long)parent,
6371 (unsigned long long)root,
6372 (unsigned long long)owner,
6373 (unsigned long long)offset,
6374 (unsigned long)num_refs);
6376 back->num_refs = num_refs;
6377 back->node.found_extent_tree = 1;
6379 maybe_free_extent_rec(extent_cache, rec);
6383 static int add_pending(struct cache_tree *pending,
6384 struct cache_tree *seen, u64 bytenr, u32 size)
6387 ret = add_cache_extent(seen, bytenr, size);
6390 add_cache_extent(pending, bytenr, size);
6394 static int pick_next_pending(struct cache_tree *pending,
6395 struct cache_tree *reada,
6396 struct cache_tree *nodes,
6397 u64 last, struct block_info *bits, int bits_nr,
6400 unsigned long node_start = last;
6401 struct cache_extent *cache;
6404 cache = search_cache_extent(reada, 0);
6406 bits[0].start = cache->start;
6407 bits[0].size = cache->size;
6412 if (node_start > 32768)
6413 node_start -= 32768;
6415 cache = search_cache_extent(nodes, node_start);
6417 cache = search_cache_extent(nodes, 0);
6420 cache = search_cache_extent(pending, 0);
6425 bits[ret].start = cache->start;
6426 bits[ret].size = cache->size;
6427 cache = next_cache_extent(cache);
6429 } while (cache && ret < bits_nr);
6435 bits[ret].start = cache->start;
6436 bits[ret].size = cache->size;
6437 cache = next_cache_extent(cache);
6439 } while (cache && ret < bits_nr);
6441 if (bits_nr - ret > 8) {
6442 u64 lookup = bits[0].start + bits[0].size;
6443 struct cache_extent *next;
6444 next = search_cache_extent(pending, lookup);
6446 if (next->start - lookup > 32768)
6448 bits[ret].start = next->start;
6449 bits[ret].size = next->size;
6450 lookup = next->start + next->size;
6454 next = next_cache_extent(next);
6462 static void free_chunk_record(struct cache_extent *cache)
6464 struct chunk_record *rec;
6466 rec = container_of(cache, struct chunk_record, cache);
6467 list_del_init(&rec->list);
6468 list_del_init(&rec->dextents);
6472 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6474 cache_tree_free_extents(chunk_cache, free_chunk_record);
6477 static void free_device_record(struct rb_node *node)
6479 struct device_record *rec;
6481 rec = container_of(node, struct device_record, node);
6485 FREE_RB_BASED_TREE(device_cache, free_device_record);
6487 int insert_block_group_record(struct block_group_tree *tree,
6488 struct block_group_record *bg_rec)
6492 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6496 list_add_tail(&bg_rec->list, &tree->block_groups);
6500 static void free_block_group_record(struct cache_extent *cache)
6502 struct block_group_record *rec;
6504 rec = container_of(cache, struct block_group_record, cache);
6505 list_del_init(&rec->list);
6509 void free_block_group_tree(struct block_group_tree *tree)
6511 cache_tree_free_extents(&tree->tree, free_block_group_record);
6514 int insert_device_extent_record(struct device_extent_tree *tree,
6515 struct device_extent_record *de_rec)
6520 * Device extent is a bit different from the other extents, because
6521 * the extents which belong to the different devices may have the
6522 * same start and size, so we need use the special extent cache
6523 * search/insert functions.
6525 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6529 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6530 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6534 static void free_device_extent_record(struct cache_extent *cache)
6536 struct device_extent_record *rec;
6538 rec = container_of(cache, struct device_extent_record, cache);
6539 if (!list_empty(&rec->chunk_list))
6540 list_del_init(&rec->chunk_list);
6541 if (!list_empty(&rec->device_list))
6542 list_del_init(&rec->device_list);
6546 void free_device_extent_tree(struct device_extent_tree *tree)
6548 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6551 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6552 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6553 struct extent_buffer *leaf, int slot)
6555 struct btrfs_extent_ref_v0 *ref0;
6556 struct btrfs_key key;
6559 btrfs_item_key_to_cpu(leaf, &key, slot);
6560 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6561 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6562 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6565 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6566 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6572 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6573 struct btrfs_key *key,
6576 struct btrfs_chunk *ptr;
6577 struct chunk_record *rec;
6580 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6581 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6583 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6585 fprintf(stderr, "memory allocation failed\n");
6589 INIT_LIST_HEAD(&rec->list);
6590 INIT_LIST_HEAD(&rec->dextents);
6593 rec->cache.start = key->offset;
6594 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6596 rec->generation = btrfs_header_generation(leaf);
6598 rec->objectid = key->objectid;
6599 rec->type = key->type;
6600 rec->offset = key->offset;
6602 rec->length = rec->cache.size;
6603 rec->owner = btrfs_chunk_owner(leaf, ptr);
6604 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6605 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6606 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6607 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6608 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6609 rec->num_stripes = num_stripes;
6610 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6612 for (i = 0; i < rec->num_stripes; ++i) {
6613 rec->stripes[i].devid =
6614 btrfs_stripe_devid_nr(leaf, ptr, i);
6615 rec->stripes[i].offset =
6616 btrfs_stripe_offset_nr(leaf, ptr, i);
6617 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6618 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6625 static int process_chunk_item(struct cache_tree *chunk_cache,
6626 struct btrfs_key *key, struct extent_buffer *eb,
6629 struct chunk_record *rec;
6630 struct btrfs_chunk *chunk;
6633 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6635 * Do extra check for this chunk item,
6637 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6638 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6639 * and owner<->key_type check.
6641 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6644 error("chunk(%llu, %llu) is not valid, ignore it",
6645 key->offset, btrfs_chunk_length(eb, chunk));
6648 rec = btrfs_new_chunk_record(eb, key, slot);
6649 ret = insert_cache_extent(chunk_cache, &rec->cache);
6651 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6652 rec->offset, rec->length);
6659 static int process_device_item(struct rb_root *dev_cache,
6660 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6662 struct btrfs_dev_item *ptr;
6663 struct device_record *rec;
6666 ptr = btrfs_item_ptr(eb,
6667 slot, struct btrfs_dev_item);
6669 rec = malloc(sizeof(*rec));
6671 fprintf(stderr, "memory allocation failed\n");
6675 rec->devid = key->offset;
6676 rec->generation = btrfs_header_generation(eb);
6678 rec->objectid = key->objectid;
6679 rec->type = key->type;
6680 rec->offset = key->offset;
6682 rec->devid = btrfs_device_id(eb, ptr);
6683 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6684 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6686 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6688 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6695 struct block_group_record *
6696 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6699 struct btrfs_block_group_item *ptr;
6700 struct block_group_record *rec;
6702 rec = calloc(1, sizeof(*rec));
6704 fprintf(stderr, "memory allocation failed\n");
6708 rec->cache.start = key->objectid;
6709 rec->cache.size = key->offset;
6711 rec->generation = btrfs_header_generation(leaf);
6713 rec->objectid = key->objectid;
6714 rec->type = key->type;
6715 rec->offset = key->offset;
6717 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6718 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6720 INIT_LIST_HEAD(&rec->list);
6725 static int process_block_group_item(struct block_group_tree *block_group_cache,
6726 struct btrfs_key *key,
6727 struct extent_buffer *eb, int slot)
6729 struct block_group_record *rec;
6732 rec = btrfs_new_block_group_record(eb, key, slot);
6733 ret = insert_block_group_record(block_group_cache, rec);
6735 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6736 rec->objectid, rec->offset);
6743 struct device_extent_record *
6744 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6745 struct btrfs_key *key, int slot)
6747 struct device_extent_record *rec;
6748 struct btrfs_dev_extent *ptr;
6750 rec = calloc(1, sizeof(*rec));
6752 fprintf(stderr, "memory allocation failed\n");
6756 rec->cache.objectid = key->objectid;
6757 rec->cache.start = key->offset;
6759 rec->generation = btrfs_header_generation(leaf);
6761 rec->objectid = key->objectid;
6762 rec->type = key->type;
6763 rec->offset = key->offset;
6765 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6766 rec->chunk_objecteid =
6767 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6769 btrfs_dev_extent_chunk_offset(leaf, ptr);
6770 rec->length = btrfs_dev_extent_length(leaf, ptr);
6771 rec->cache.size = rec->length;
6773 INIT_LIST_HEAD(&rec->chunk_list);
6774 INIT_LIST_HEAD(&rec->device_list);
6780 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6781 struct btrfs_key *key, struct extent_buffer *eb,
6784 struct device_extent_record *rec;
6787 rec = btrfs_new_device_extent_record(eb, key, slot);
6788 ret = insert_device_extent_record(dev_extent_cache, rec);
6791 "Device extent[%llu, %llu, %llu] existed.\n",
6792 rec->objectid, rec->offset, rec->length);
6799 static int process_extent_item(struct btrfs_root *root,
6800 struct cache_tree *extent_cache,
6801 struct extent_buffer *eb, int slot)
6803 struct btrfs_extent_item *ei;
6804 struct btrfs_extent_inline_ref *iref;
6805 struct btrfs_extent_data_ref *dref;
6806 struct btrfs_shared_data_ref *sref;
6807 struct btrfs_key key;
6808 struct extent_record tmpl;
6813 u32 item_size = btrfs_item_size_nr(eb, slot);
6819 btrfs_item_key_to_cpu(eb, &key, slot);
6821 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6823 num_bytes = root->fs_info->nodesize;
6825 num_bytes = key.offset;
6828 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6829 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6830 key.objectid, root->fs_info->sectorsize);
6833 if (item_size < sizeof(*ei)) {
6834 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6835 struct btrfs_extent_item_v0 *ei0;
6836 BUG_ON(item_size != sizeof(*ei0));
6837 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6838 refs = btrfs_extent_refs_v0(eb, ei0);
6842 memset(&tmpl, 0, sizeof(tmpl));
6843 tmpl.start = key.objectid;
6844 tmpl.nr = num_bytes;
6845 tmpl.extent_item_refs = refs;
6846 tmpl.metadata = metadata;
6848 tmpl.max_size = num_bytes;
6850 return add_extent_rec(extent_cache, &tmpl);
6853 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6854 refs = btrfs_extent_refs(eb, ei);
6855 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6859 if (metadata && num_bytes != root->fs_info->nodesize) {
6860 error("ignore invalid metadata extent, length %llu does not equal to %u",
6861 num_bytes, root->fs_info->nodesize);
6864 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6865 error("ignore invalid data extent, length %llu is not aligned to %u",
6866 num_bytes, root->fs_info->sectorsize);
6870 memset(&tmpl, 0, sizeof(tmpl));
6871 tmpl.start = key.objectid;
6872 tmpl.nr = num_bytes;
6873 tmpl.extent_item_refs = refs;
6874 tmpl.metadata = metadata;
6876 tmpl.max_size = num_bytes;
6877 add_extent_rec(extent_cache, &tmpl);
6879 ptr = (unsigned long)(ei + 1);
6880 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6881 key.type == BTRFS_EXTENT_ITEM_KEY)
6882 ptr += sizeof(struct btrfs_tree_block_info);
6884 end = (unsigned long)ei + item_size;
6886 iref = (struct btrfs_extent_inline_ref *)ptr;
6887 type = btrfs_extent_inline_ref_type(eb, iref);
6888 offset = btrfs_extent_inline_ref_offset(eb, iref);
6890 case BTRFS_TREE_BLOCK_REF_KEY:
6891 ret = add_tree_backref(extent_cache, key.objectid,
6895 "add_tree_backref failed (extent items tree block): %s",
6898 case BTRFS_SHARED_BLOCK_REF_KEY:
6899 ret = add_tree_backref(extent_cache, key.objectid,
6903 "add_tree_backref failed (extent items shared block): %s",
6906 case BTRFS_EXTENT_DATA_REF_KEY:
6907 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6908 add_data_backref(extent_cache, key.objectid, 0,
6909 btrfs_extent_data_ref_root(eb, dref),
6910 btrfs_extent_data_ref_objectid(eb,
6912 btrfs_extent_data_ref_offset(eb, dref),
6913 btrfs_extent_data_ref_count(eb, dref),
6916 case BTRFS_SHARED_DATA_REF_KEY:
6917 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6918 add_data_backref(extent_cache, key.objectid, offset,
6920 btrfs_shared_data_ref_count(eb, sref),
6924 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6925 key.objectid, key.type, num_bytes);
6928 ptr += btrfs_extent_inline_ref_size(type);
6935 static int check_cache_range(struct btrfs_root *root,
6936 struct btrfs_block_group_cache *cache,
6937 u64 offset, u64 bytes)
6939 struct btrfs_free_space *entry;
6945 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6946 bytenr = btrfs_sb_offset(i);
6947 ret = btrfs_rmap_block(root->fs_info,
6948 cache->key.objectid, bytenr, 0,
6949 &logical, &nr, &stripe_len);
6954 if (logical[nr] + stripe_len <= offset)
6956 if (offset + bytes <= logical[nr])
6958 if (logical[nr] == offset) {
6959 if (stripe_len >= bytes) {
6963 bytes -= stripe_len;
6964 offset += stripe_len;
6965 } else if (logical[nr] < offset) {
6966 if (logical[nr] + stripe_len >=
6971 bytes = (offset + bytes) -
6972 (logical[nr] + stripe_len);
6973 offset = logical[nr] + stripe_len;
6976 * Could be tricky, the super may land in the
6977 * middle of the area we're checking. First
6978 * check the easiest case, it's at the end.
6980 if (logical[nr] + stripe_len >=
6982 bytes = logical[nr] - offset;
6986 /* Check the left side */
6987 ret = check_cache_range(root, cache,
6989 logical[nr] - offset);
6995 /* Now we continue with the right side */
6996 bytes = (offset + bytes) -
6997 (logical[nr] + stripe_len);
6998 offset = logical[nr] + stripe_len;
7005 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7007 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7008 offset, offset+bytes);
7012 if (entry->offset != offset) {
7013 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7018 if (entry->bytes != bytes) {
7019 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7020 bytes, entry->bytes, offset);
7024 unlink_free_space(cache->free_space_ctl, entry);
7029 static int verify_space_cache(struct btrfs_root *root,
7030 struct btrfs_block_group_cache *cache)
7032 struct btrfs_path path;
7033 struct extent_buffer *leaf;
7034 struct btrfs_key key;
7038 root = root->fs_info->extent_root;
7040 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7042 btrfs_init_path(&path);
7043 key.objectid = last;
7045 key.type = BTRFS_EXTENT_ITEM_KEY;
7046 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7051 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7052 ret = btrfs_next_leaf(root, &path);
7060 leaf = path.nodes[0];
7061 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7062 if (key.objectid >= cache->key.offset + cache->key.objectid)
7064 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7065 key.type != BTRFS_METADATA_ITEM_KEY) {
7070 if (last == key.objectid) {
7071 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7072 last = key.objectid + key.offset;
7074 last = key.objectid + root->fs_info->nodesize;
7079 ret = check_cache_range(root, cache, last,
7080 key.objectid - last);
7083 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7084 last = key.objectid + key.offset;
7086 last = key.objectid + root->fs_info->nodesize;
7090 if (last < cache->key.objectid + cache->key.offset)
7091 ret = check_cache_range(root, cache, last,
7092 cache->key.objectid +
7093 cache->key.offset - last);
7096 btrfs_release_path(&path);
7099 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7100 fprintf(stderr, "There are still entries left in the space "
7108 static int check_space_cache(struct btrfs_root *root)
7110 struct btrfs_block_group_cache *cache;
7111 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7115 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7116 btrfs_super_generation(root->fs_info->super_copy) !=
7117 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7118 printf("cache and super generation don't match, space cache "
7119 "will be invalidated\n");
7123 if (ctx.progress_enabled) {
7124 ctx.tp = TASK_FREE_SPACE;
7125 task_start(ctx.info);
7129 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7133 start = cache->key.objectid + cache->key.offset;
7134 if (!cache->free_space_ctl) {
7135 if (btrfs_init_free_space_ctl(cache,
7136 root->fs_info->sectorsize)) {
7141 btrfs_remove_free_space_cache(cache);
7144 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7145 ret = exclude_super_stripes(root, cache);
7147 fprintf(stderr, "could not exclude super stripes: %s\n",
7152 ret = load_free_space_tree(root->fs_info, cache);
7153 free_excluded_extents(root, cache);
7155 fprintf(stderr, "could not load free space tree: %s\n",
7162 ret = load_free_space_cache(root->fs_info, cache);
7167 ret = verify_space_cache(root, cache);
7169 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7170 cache->key.objectid);
7175 task_stop(ctx.info);
7177 return error ? -EINVAL : 0;
7180 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7181 u64 num_bytes, unsigned long leaf_offset,
7182 struct extent_buffer *eb) {
7184 struct btrfs_fs_info *fs_info = root->fs_info;
7186 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7188 unsigned long csum_offset;
7192 u64 data_checked = 0;
7198 if (num_bytes % fs_info->sectorsize)
7201 data = malloc(num_bytes);
7205 while (offset < num_bytes) {
7208 read_len = num_bytes - offset;
7209 /* read as much space once a time */
7210 ret = read_extent_data(fs_info, data + offset,
7211 bytenr + offset, &read_len, mirror);
7215 /* verify every 4k data's checksum */
7216 while (data_checked < read_len) {
7218 tmp = offset + data_checked;
7220 csum = btrfs_csum_data((char *)data + tmp,
7221 csum, fs_info->sectorsize);
7222 btrfs_csum_final(csum, (u8 *)&csum);
7224 csum_offset = leaf_offset +
7225 tmp / fs_info->sectorsize * csum_size;
7226 read_extent_buffer(eb, (char *)&csum_expected,
7227 csum_offset, csum_size);
7228 /* try another mirror */
7229 if (csum != csum_expected) {
7230 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7231 mirror, bytenr + tmp,
7232 csum, csum_expected);
7233 num_copies = btrfs_num_copies(root->fs_info,
7235 if (mirror < num_copies - 1) {
7240 data_checked += fs_info->sectorsize;
7249 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7252 struct btrfs_path path;
7253 struct extent_buffer *leaf;
7254 struct btrfs_key key;
7257 btrfs_init_path(&path);
7258 key.objectid = bytenr;
7259 key.type = BTRFS_EXTENT_ITEM_KEY;
7260 key.offset = (u64)-1;
7263 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7266 fprintf(stderr, "Error looking up extent record %d\n", ret);
7267 btrfs_release_path(&path);
7270 if (path.slots[0] > 0) {
7273 ret = btrfs_prev_leaf(root, &path);
7276 } else if (ret > 0) {
7283 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7286 * Block group items come before extent items if they have the same
7287 * bytenr, so walk back one more just in case. Dear future traveller,
7288 * first congrats on mastering time travel. Now if it's not too much
7289 * trouble could you go back to 2006 and tell Chris to make the
7290 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7291 * EXTENT_ITEM_KEY please?
7293 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7294 if (path.slots[0] > 0) {
7297 ret = btrfs_prev_leaf(root, &path);
7300 } else if (ret > 0) {
7305 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7309 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7310 ret = btrfs_next_leaf(root, &path);
7312 fprintf(stderr, "Error going to next leaf "
7314 btrfs_release_path(&path);
7320 leaf = path.nodes[0];
7321 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7322 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7326 if (key.objectid + key.offset < bytenr) {
7330 if (key.objectid > bytenr + num_bytes)
7333 if (key.objectid == bytenr) {
7334 if (key.offset >= num_bytes) {
7338 num_bytes -= key.offset;
7339 bytenr += key.offset;
7340 } else if (key.objectid < bytenr) {
7341 if (key.objectid + key.offset >= bytenr + num_bytes) {
7345 num_bytes = (bytenr + num_bytes) -
7346 (key.objectid + key.offset);
7347 bytenr = key.objectid + key.offset;
7349 if (key.objectid + key.offset < bytenr + num_bytes) {
7350 u64 new_start = key.objectid + key.offset;
7351 u64 new_bytes = bytenr + num_bytes - new_start;
7354 * Weird case, the extent is in the middle of
7355 * our range, we'll have to search one side
7356 * and then the other. Not sure if this happens
7357 * in real life, but no harm in coding it up
7358 * anyway just in case.
7360 btrfs_release_path(&path);
7361 ret = check_extent_exists(root, new_start,
7364 fprintf(stderr, "Right section didn't "
7368 num_bytes = key.objectid - bytenr;
7371 num_bytes = key.objectid - bytenr;
7378 if (num_bytes && !ret) {
7379 fprintf(stderr, "There are no extents for csum range "
7380 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7384 btrfs_release_path(&path);
7388 static int check_csums(struct btrfs_root *root)
7390 struct btrfs_path path;
7391 struct extent_buffer *leaf;
7392 struct btrfs_key key;
7393 u64 offset = 0, num_bytes = 0;
7394 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7398 unsigned long leaf_offset;
7400 root = root->fs_info->csum_root;
7401 if (!extent_buffer_uptodate(root->node)) {
7402 fprintf(stderr, "No valid csum tree found\n");
7406 btrfs_init_path(&path);
7407 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7408 key.type = BTRFS_EXTENT_CSUM_KEY;
7410 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7412 fprintf(stderr, "Error searching csum tree %d\n", ret);
7413 btrfs_release_path(&path);
7417 if (ret > 0 && path.slots[0])
7422 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7423 ret = btrfs_next_leaf(root, &path);
7425 fprintf(stderr, "Error going to next leaf "
7432 leaf = path.nodes[0];
7434 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7435 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7440 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7441 csum_size) * root->fs_info->sectorsize;
7442 if (!check_data_csum)
7443 goto skip_csum_check;
7444 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7445 ret = check_extent_csums(root, key.offset, data_len,
7451 offset = key.offset;
7452 } else if (key.offset != offset + num_bytes) {
7453 ret = check_extent_exists(root, offset, num_bytes);
7455 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7456 "there is no extent record\n",
7457 offset, offset+num_bytes);
7460 offset = key.offset;
7463 num_bytes += data_len;
7467 btrfs_release_path(&path);
7471 static int is_dropped_key(struct btrfs_key *key,
7472 struct btrfs_key *drop_key) {
7473 if (key->objectid < drop_key->objectid)
7475 else if (key->objectid == drop_key->objectid) {
7476 if (key->type < drop_key->type)
7478 else if (key->type == drop_key->type) {
7479 if (key->offset < drop_key->offset)
7487 * Here are the rules for FULL_BACKREF.
7489 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7490 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7492 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7493 * if it happened after the relocation occurred since we'll have dropped the
7494 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7495 * have no real way to know for sure.
7497 * We process the blocks one root at a time, and we start from the lowest root
7498 * objectid and go to the highest. So we can just lookup the owner backref for
7499 * the record and if we don't find it then we know it doesn't exist and we have
7502 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7503 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7504 * be set or not and then we can check later once we've gathered all the refs.
7506 static int calc_extent_flag(struct cache_tree *extent_cache,
7507 struct extent_buffer *buf,
7508 struct root_item_record *ri,
7511 struct extent_record *rec;
7512 struct cache_extent *cache;
7513 struct tree_backref *tback;
7516 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7517 /* we have added this extent before */
7521 rec = container_of(cache, struct extent_record, cache);
7524 * Except file/reloc tree, we can not have
7527 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7532 if (buf->start == ri->bytenr)
7535 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7538 owner = btrfs_header_owner(buf);
7539 if (owner == ri->objectid)
7542 tback = find_tree_backref(rec, 0, owner);
7547 if (rec->flag_block_full_backref != FLAG_UNSET &&
7548 rec->flag_block_full_backref != 0)
7549 rec->bad_full_backref = 1;
7552 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7553 if (rec->flag_block_full_backref != FLAG_UNSET &&
7554 rec->flag_block_full_backref != 1)
7555 rec->bad_full_backref = 1;
7559 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7561 fprintf(stderr, "Invalid key type(");
7562 print_key_type(stderr, 0, key_type);
7563 fprintf(stderr, ") found in root(");
7564 print_objectid(stderr, rootid, 0);
7565 fprintf(stderr, ")\n");
7569 * Check if the key is valid with its extent buffer.
7571 * This is a early check in case invalid key exists in a extent buffer
7572 * This is not comprehensive yet, but should prevent wrong key/item passed
7575 static int check_type_with_root(u64 rootid, u8 key_type)
7578 /* Only valid in chunk tree */
7579 case BTRFS_DEV_ITEM_KEY:
7580 case BTRFS_CHUNK_ITEM_KEY:
7581 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7584 /* valid in csum and log tree */
7585 case BTRFS_CSUM_TREE_OBJECTID:
7586 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7590 case BTRFS_EXTENT_ITEM_KEY:
7591 case BTRFS_METADATA_ITEM_KEY:
7592 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7593 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7596 case BTRFS_ROOT_ITEM_KEY:
7597 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7600 case BTRFS_DEV_EXTENT_KEY:
7601 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7607 report_mismatch_key_root(key_type, rootid);
7611 static int run_next_block(struct btrfs_root *root,
7612 struct block_info *bits,
7615 struct cache_tree *pending,
7616 struct cache_tree *seen,
7617 struct cache_tree *reada,
7618 struct cache_tree *nodes,
7619 struct cache_tree *extent_cache,
7620 struct cache_tree *chunk_cache,
7621 struct rb_root *dev_cache,
7622 struct block_group_tree *block_group_cache,
7623 struct device_extent_tree *dev_extent_cache,
7624 struct root_item_record *ri)
7626 struct btrfs_fs_info *fs_info = root->fs_info;
7627 struct extent_buffer *buf;
7628 struct extent_record *rec = NULL;
7639 struct btrfs_key key;
7640 struct cache_extent *cache;
7643 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7644 bits_nr, &reada_bits);
7649 for(i = 0; i < nritems; i++) {
7650 ret = add_cache_extent(reada, bits[i].start,
7655 /* fixme, get the parent transid */
7656 readahead_tree_block(fs_info, bits[i].start,
7660 *last = bits[0].start;
7661 bytenr = bits[0].start;
7662 size = bits[0].size;
7664 cache = lookup_cache_extent(pending, bytenr, size);
7666 remove_cache_extent(pending, cache);
7669 cache = lookup_cache_extent(reada, bytenr, size);
7671 remove_cache_extent(reada, cache);
7674 cache = lookup_cache_extent(nodes, bytenr, size);
7676 remove_cache_extent(nodes, cache);
7679 cache = lookup_cache_extent(extent_cache, bytenr, size);
7681 rec = container_of(cache, struct extent_record, cache);
7682 gen = rec->parent_generation;
7685 /* fixme, get the real parent transid */
7686 buf = read_tree_block(root->fs_info, bytenr, size, gen);
7687 if (!extent_buffer_uptodate(buf)) {
7688 record_bad_block_io(root->fs_info,
7689 extent_cache, bytenr, size);
7693 nritems = btrfs_header_nritems(buf);
7696 if (!init_extent_tree) {
7697 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7698 btrfs_header_level(buf), 1, NULL,
7701 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7703 fprintf(stderr, "Couldn't calc extent flags\n");
7704 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7709 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7711 fprintf(stderr, "Couldn't calc extent flags\n");
7712 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7718 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7719 ri->objectid == btrfs_header_owner(buf)) {
7721 * Ok we got to this block from it's original owner and
7722 * we have FULL_BACKREF set. Relocation can leave
7723 * converted blocks over so this is altogether possible,
7724 * however it's not possible if the generation > the
7725 * last snapshot, so check for this case.
7727 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7728 btrfs_header_generation(buf) > ri->last_snapshot) {
7729 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7730 rec->bad_full_backref = 1;
7735 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7736 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7737 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7738 rec->bad_full_backref = 1;
7742 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7743 rec->flag_block_full_backref = 1;
7747 rec->flag_block_full_backref = 0;
7749 owner = btrfs_header_owner(buf);
7752 ret = check_block(root, extent_cache, buf, flags);
7756 if (btrfs_is_leaf(buf)) {
7757 btree_space_waste += btrfs_leaf_free_space(root, buf);
7758 for (i = 0; i < nritems; i++) {
7759 struct btrfs_file_extent_item *fi;
7760 btrfs_item_key_to_cpu(buf, &key, i);
7762 * Check key type against the leaf owner.
7763 * Could filter quite a lot of early error if
7766 if (check_type_with_root(btrfs_header_owner(buf),
7768 fprintf(stderr, "ignoring invalid key\n");
7771 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7772 process_extent_item(root, extent_cache, buf,
7776 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7777 process_extent_item(root, extent_cache, buf,
7781 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7783 btrfs_item_size_nr(buf, i);
7786 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7787 process_chunk_item(chunk_cache, &key, buf, i);
7790 if (key.type == BTRFS_DEV_ITEM_KEY) {
7791 process_device_item(dev_cache, &key, buf, i);
7794 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7795 process_block_group_item(block_group_cache,
7799 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7800 process_device_extent_item(dev_extent_cache,
7805 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7806 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7807 process_extent_ref_v0(extent_cache, buf, i);
7814 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7815 ret = add_tree_backref(extent_cache,
7816 key.objectid, 0, key.offset, 0);
7819 "add_tree_backref failed (leaf tree block): %s",
7823 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7824 ret = add_tree_backref(extent_cache,
7825 key.objectid, key.offset, 0, 0);
7828 "add_tree_backref failed (leaf shared block): %s",
7832 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7833 struct btrfs_extent_data_ref *ref;
7834 ref = btrfs_item_ptr(buf, i,
7835 struct btrfs_extent_data_ref);
7836 add_data_backref(extent_cache,
7838 btrfs_extent_data_ref_root(buf, ref),
7839 btrfs_extent_data_ref_objectid(buf,
7841 btrfs_extent_data_ref_offset(buf, ref),
7842 btrfs_extent_data_ref_count(buf, ref),
7843 0, root->fs_info->sectorsize);
7846 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7847 struct btrfs_shared_data_ref *ref;
7848 ref = btrfs_item_ptr(buf, i,
7849 struct btrfs_shared_data_ref);
7850 add_data_backref(extent_cache,
7851 key.objectid, key.offset, 0, 0, 0,
7852 btrfs_shared_data_ref_count(buf, ref),
7853 0, root->fs_info->sectorsize);
7856 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7857 struct bad_item *bad;
7859 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7863 bad = malloc(sizeof(struct bad_item));
7866 INIT_LIST_HEAD(&bad->list);
7867 memcpy(&bad->key, &key,
7868 sizeof(struct btrfs_key));
7869 bad->root_id = owner;
7870 list_add_tail(&bad->list, &delete_items);
7873 if (key.type != BTRFS_EXTENT_DATA_KEY)
7875 fi = btrfs_item_ptr(buf, i,
7876 struct btrfs_file_extent_item);
7877 if (btrfs_file_extent_type(buf, fi) ==
7878 BTRFS_FILE_EXTENT_INLINE)
7880 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7883 data_bytes_allocated +=
7884 btrfs_file_extent_disk_num_bytes(buf, fi);
7885 if (data_bytes_allocated < root->fs_info->sectorsize) {
7888 data_bytes_referenced +=
7889 btrfs_file_extent_num_bytes(buf, fi);
7890 add_data_backref(extent_cache,
7891 btrfs_file_extent_disk_bytenr(buf, fi),
7892 parent, owner, key.objectid, key.offset -
7893 btrfs_file_extent_offset(buf, fi), 1, 1,
7894 btrfs_file_extent_disk_num_bytes(buf, fi));
7898 struct btrfs_key first_key;
7900 first_key.objectid = 0;
7903 btrfs_item_key_to_cpu(buf, &first_key, 0);
7904 level = btrfs_header_level(buf);
7905 for (i = 0; i < nritems; i++) {
7906 struct extent_record tmpl;
7908 ptr = btrfs_node_blockptr(buf, i);
7909 size = root->fs_info->nodesize;
7910 btrfs_node_key_to_cpu(buf, &key, i);
7912 if ((level == ri->drop_level)
7913 && is_dropped_key(&key, &ri->drop_key)) {
7918 memset(&tmpl, 0, sizeof(tmpl));
7919 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7920 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7925 tmpl.max_size = size;
7926 ret = add_extent_rec(extent_cache, &tmpl);
7930 ret = add_tree_backref(extent_cache, ptr, parent,
7934 "add_tree_backref failed (non-leaf block): %s",
7940 add_pending(nodes, seen, ptr, size);
7942 add_pending(pending, seen, ptr, size);
7945 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7946 nritems) * sizeof(struct btrfs_key_ptr);
7948 total_btree_bytes += buf->len;
7949 if (fs_root_objectid(btrfs_header_owner(buf)))
7950 total_fs_tree_bytes += buf->len;
7951 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7952 total_extent_tree_bytes += buf->len;
7953 if (!found_old_backref &&
7954 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7955 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7956 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7957 found_old_backref = 1;
7959 free_extent_buffer(buf);
7963 static int add_root_to_pending(struct extent_buffer *buf,
7964 struct cache_tree *extent_cache,
7965 struct cache_tree *pending,
7966 struct cache_tree *seen,
7967 struct cache_tree *nodes,
7970 struct extent_record tmpl;
7973 if (btrfs_header_level(buf) > 0)
7974 add_pending(nodes, seen, buf->start, buf->len);
7976 add_pending(pending, seen, buf->start, buf->len);
7978 memset(&tmpl, 0, sizeof(tmpl));
7979 tmpl.start = buf->start;
7984 tmpl.max_size = buf->len;
7985 add_extent_rec(extent_cache, &tmpl);
7987 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7988 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7989 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7992 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7997 /* as we fix the tree, we might be deleting blocks that
7998 * we're tracking for repair. This hook makes sure we
7999 * remove any backrefs for blocks as we are fixing them.
8001 static int free_extent_hook(struct btrfs_trans_handle *trans,
8002 struct btrfs_root *root,
8003 u64 bytenr, u64 num_bytes, u64 parent,
8004 u64 root_objectid, u64 owner, u64 offset,
8007 struct extent_record *rec;
8008 struct cache_extent *cache;
8010 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8012 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8013 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8017 rec = container_of(cache, struct extent_record, cache);
8019 struct data_backref *back;
8020 back = find_data_backref(rec, parent, root_objectid, owner,
8021 offset, 1, bytenr, num_bytes);
8024 if (back->node.found_ref) {
8025 back->found_ref -= refs_to_drop;
8027 rec->refs -= refs_to_drop;
8029 if (back->node.found_extent_tree) {
8030 back->num_refs -= refs_to_drop;
8031 if (rec->extent_item_refs)
8032 rec->extent_item_refs -= refs_to_drop;
8034 if (back->found_ref == 0)
8035 back->node.found_ref = 0;
8036 if (back->num_refs == 0)
8037 back->node.found_extent_tree = 0;
8039 if (!back->node.found_extent_tree && back->node.found_ref) {
8040 list_del(&back->node.list);
8044 struct tree_backref *back;
8045 back = find_tree_backref(rec, parent, root_objectid);
8048 if (back->node.found_ref) {
8051 back->node.found_ref = 0;
8053 if (back->node.found_extent_tree) {
8054 if (rec->extent_item_refs)
8055 rec->extent_item_refs--;
8056 back->node.found_extent_tree = 0;
8058 if (!back->node.found_extent_tree && back->node.found_ref) {
8059 list_del(&back->node.list);
8063 maybe_free_extent_rec(extent_cache, rec);
8068 static int delete_extent_records(struct btrfs_trans_handle *trans,
8069 struct btrfs_root *root,
8070 struct btrfs_path *path,
8073 struct btrfs_key key;
8074 struct btrfs_key found_key;
8075 struct extent_buffer *leaf;
8080 key.objectid = bytenr;
8082 key.offset = (u64)-1;
8085 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8092 if (path->slots[0] == 0)
8098 leaf = path->nodes[0];
8099 slot = path->slots[0];
8101 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8102 if (found_key.objectid != bytenr)
8105 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8106 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8107 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8108 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8109 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8110 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8111 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8112 btrfs_release_path(path);
8113 if (found_key.type == 0) {
8114 if (found_key.offset == 0)
8116 key.offset = found_key.offset - 1;
8117 key.type = found_key.type;
8119 key.type = found_key.type - 1;
8120 key.offset = (u64)-1;
8124 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8125 found_key.objectid, found_key.type, found_key.offset);
8127 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8130 btrfs_release_path(path);
8132 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8133 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8134 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8135 found_key.offset : root->fs_info->nodesize;
8137 ret = btrfs_update_block_group(trans, root, bytenr,
8144 btrfs_release_path(path);
8149 * for a single backref, this will allocate a new extent
8150 * and add the backref to it.
8152 static int record_extent(struct btrfs_trans_handle *trans,
8153 struct btrfs_fs_info *info,
8154 struct btrfs_path *path,
8155 struct extent_record *rec,
8156 struct extent_backref *back,
8157 int allocated, u64 flags)
8160 struct btrfs_root *extent_root = info->extent_root;
8161 struct extent_buffer *leaf;
8162 struct btrfs_key ins_key;
8163 struct btrfs_extent_item *ei;
8164 struct data_backref *dback;
8165 struct btrfs_tree_block_info *bi;
8168 rec->max_size = max_t(u64, rec->max_size,
8172 u32 item_size = sizeof(*ei);
8175 item_size += sizeof(*bi);
8177 ins_key.objectid = rec->start;
8178 ins_key.offset = rec->max_size;
8179 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8181 ret = btrfs_insert_empty_item(trans, extent_root, path,
8182 &ins_key, item_size);
8186 leaf = path->nodes[0];
8187 ei = btrfs_item_ptr(leaf, path->slots[0],
8188 struct btrfs_extent_item);
8190 btrfs_set_extent_refs(leaf, ei, 0);
8191 btrfs_set_extent_generation(leaf, ei, rec->generation);
8193 if (back->is_data) {
8194 btrfs_set_extent_flags(leaf, ei,
8195 BTRFS_EXTENT_FLAG_DATA);
8197 struct btrfs_disk_key copy_key;;
8199 bi = (struct btrfs_tree_block_info *)(ei + 1);
8200 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8203 btrfs_set_disk_key_objectid(©_key,
8204 rec->info_objectid);
8205 btrfs_set_disk_key_type(©_key, 0);
8206 btrfs_set_disk_key_offset(©_key, 0);
8208 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8209 btrfs_set_tree_block_key(leaf, bi, ©_key);
8211 btrfs_set_extent_flags(leaf, ei,
8212 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8215 btrfs_mark_buffer_dirty(leaf);
8216 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8217 rec->max_size, 1, 0);
8220 btrfs_release_path(path);
8223 if (back->is_data) {
8227 dback = to_data_backref(back);
8228 if (back->full_backref)
8229 parent = dback->parent;
8233 for (i = 0; i < dback->found_ref; i++) {
8234 /* if parent != 0, we're doing a full backref
8235 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8236 * just makes the backref allocator create a data
8239 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8240 rec->start, rec->max_size,
8244 BTRFS_FIRST_FREE_OBJECTID :
8250 fprintf(stderr, "adding new data backref"
8251 " on %llu %s %llu owner %llu"
8252 " offset %llu found %d\n",
8253 (unsigned long long)rec->start,
8254 back->full_backref ?
8256 back->full_backref ?
8257 (unsigned long long)parent :
8258 (unsigned long long)dback->root,
8259 (unsigned long long)dback->owner,
8260 (unsigned long long)dback->offset,
8264 struct tree_backref *tback;
8266 tback = to_tree_backref(back);
8267 if (back->full_backref)
8268 parent = tback->parent;
8272 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8273 rec->start, rec->max_size,
8274 parent, tback->root, 0, 0);
8275 fprintf(stderr, "adding new tree backref on "
8276 "start %llu len %llu parent %llu root %llu\n",
8277 rec->start, rec->max_size, parent, tback->root);
8280 btrfs_release_path(path);
8284 static struct extent_entry *find_entry(struct list_head *entries,
8285 u64 bytenr, u64 bytes)
8287 struct extent_entry *entry = NULL;
8289 list_for_each_entry(entry, entries, list) {
8290 if (entry->bytenr == bytenr && entry->bytes == bytes)
8297 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8299 struct extent_entry *entry, *best = NULL, *prev = NULL;
8301 list_for_each_entry(entry, entries, list) {
8303 * If there are as many broken entries as entries then we know
8304 * not to trust this particular entry.
8306 if (entry->broken == entry->count)
8310 * Special case, when there are only two entries and 'best' is
8320 * If our current entry == best then we can't be sure our best
8321 * is really the best, so we need to keep searching.
8323 if (best && best->count == entry->count) {
8329 /* Prev == entry, not good enough, have to keep searching */
8330 if (!prev->broken && prev->count == entry->count)
8334 best = (prev->count > entry->count) ? prev : entry;
8335 else if (best->count < entry->count)
8343 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8344 struct data_backref *dback, struct extent_entry *entry)
8346 struct btrfs_trans_handle *trans;
8347 struct btrfs_root *root;
8348 struct btrfs_file_extent_item *fi;
8349 struct extent_buffer *leaf;
8350 struct btrfs_key key;
8354 key.objectid = dback->root;
8355 key.type = BTRFS_ROOT_ITEM_KEY;
8356 key.offset = (u64)-1;
8357 root = btrfs_read_fs_root(info, &key);
8359 fprintf(stderr, "Couldn't find root for our ref\n");
8364 * The backref points to the original offset of the extent if it was
8365 * split, so we need to search down to the offset we have and then walk
8366 * forward until we find the backref we're looking for.
8368 key.objectid = dback->owner;
8369 key.type = BTRFS_EXTENT_DATA_KEY;
8370 key.offset = dback->offset;
8371 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8373 fprintf(stderr, "Error looking up ref %d\n", ret);
8378 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8379 ret = btrfs_next_leaf(root, path);
8381 fprintf(stderr, "Couldn't find our ref, next\n");
8385 leaf = path->nodes[0];
8386 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8387 if (key.objectid != dback->owner ||
8388 key.type != BTRFS_EXTENT_DATA_KEY) {
8389 fprintf(stderr, "Couldn't find our ref, search\n");
8392 fi = btrfs_item_ptr(leaf, path->slots[0],
8393 struct btrfs_file_extent_item);
8394 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8395 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8397 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8402 btrfs_release_path(path);
8404 trans = btrfs_start_transaction(root, 1);
8406 return PTR_ERR(trans);
8409 * Ok we have the key of the file extent we want to fix, now we can cow
8410 * down to the thing and fix it.
8412 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8414 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8415 key.objectid, key.type, key.offset, ret);
8419 fprintf(stderr, "Well that's odd, we just found this key "
8420 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8425 leaf = path->nodes[0];
8426 fi = btrfs_item_ptr(leaf, path->slots[0],
8427 struct btrfs_file_extent_item);
8429 if (btrfs_file_extent_compression(leaf, fi) &&
8430 dback->disk_bytenr != entry->bytenr) {
8431 fprintf(stderr, "Ref doesn't match the record start and is "
8432 "compressed, please take a btrfs-image of this file "
8433 "system and send it to a btrfs developer so they can "
8434 "complete this functionality for bytenr %Lu\n",
8435 dback->disk_bytenr);
8440 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8441 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8442 } else if (dback->disk_bytenr > entry->bytenr) {
8443 u64 off_diff, offset;
8445 off_diff = dback->disk_bytenr - entry->bytenr;
8446 offset = btrfs_file_extent_offset(leaf, fi);
8447 if (dback->disk_bytenr + offset +
8448 btrfs_file_extent_num_bytes(leaf, fi) >
8449 entry->bytenr + entry->bytes) {
8450 fprintf(stderr, "Ref is past the entry end, please "
8451 "take a btrfs-image of this file system and "
8452 "send it to a btrfs developer, ref %Lu\n",
8453 dback->disk_bytenr);
8458 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8459 btrfs_set_file_extent_offset(leaf, fi, offset);
8460 } else if (dback->disk_bytenr < entry->bytenr) {
8463 offset = btrfs_file_extent_offset(leaf, fi);
8464 if (dback->disk_bytenr + offset < entry->bytenr) {
8465 fprintf(stderr, "Ref is before the entry start, please"
8466 " take a btrfs-image of this file system and "
8467 "send it to a btrfs developer, ref %Lu\n",
8468 dback->disk_bytenr);
8473 offset += dback->disk_bytenr;
8474 offset -= entry->bytenr;
8475 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8476 btrfs_set_file_extent_offset(leaf, fi, offset);
8479 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8482 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8483 * only do this if we aren't using compression, otherwise it's a
8486 if (!btrfs_file_extent_compression(leaf, fi))
8487 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8489 printf("ram bytes may be wrong?\n");
8490 btrfs_mark_buffer_dirty(leaf);
8492 err = btrfs_commit_transaction(trans, root);
8493 btrfs_release_path(path);
8494 return ret ? ret : err;
8497 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8498 struct extent_record *rec)
8500 struct extent_backref *back;
8501 struct data_backref *dback;
8502 struct extent_entry *entry, *best = NULL;
8505 int broken_entries = 0;
8510 * Metadata is easy and the backrefs should always agree on bytenr and
8511 * size, if not we've got bigger issues.
8516 list_for_each_entry(back, &rec->backrefs, list) {
8517 if (back->full_backref || !back->is_data)
8520 dback = to_data_backref(back);
8523 * We only pay attention to backrefs that we found a real
8526 if (dback->found_ref == 0)
8530 * For now we only catch when the bytes don't match, not the
8531 * bytenr. We can easily do this at the same time, but I want
8532 * to have a fs image to test on before we just add repair
8533 * functionality willy-nilly so we know we won't screw up the
8537 entry = find_entry(&entries, dback->disk_bytenr,
8540 entry = malloc(sizeof(struct extent_entry));
8545 memset(entry, 0, sizeof(*entry));
8546 entry->bytenr = dback->disk_bytenr;
8547 entry->bytes = dback->bytes;
8548 list_add_tail(&entry->list, &entries);
8553 * If we only have on entry we may think the entries agree when
8554 * in reality they don't so we have to do some extra checking.
8556 if (dback->disk_bytenr != rec->start ||
8557 dback->bytes != rec->nr || back->broken)
8568 /* Yay all the backrefs agree, carry on good sir */
8569 if (nr_entries <= 1 && !mismatch)
8572 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8573 "%Lu\n", rec->start);
8576 * First we want to see if the backrefs can agree amongst themselves who
8577 * is right, so figure out which one of the entries has the highest
8580 best = find_most_right_entry(&entries);
8583 * Ok so we may have an even split between what the backrefs think, so
8584 * this is where we use the extent ref to see what it thinks.
8587 entry = find_entry(&entries, rec->start, rec->nr);
8588 if (!entry && (!broken_entries || !rec->found_rec)) {
8589 fprintf(stderr, "Backrefs don't agree with each other "
8590 "and extent record doesn't agree with anybody,"
8591 " so we can't fix bytenr %Lu bytes %Lu\n",
8592 rec->start, rec->nr);
8595 } else if (!entry) {
8597 * Ok our backrefs were broken, we'll assume this is the
8598 * correct value and add an entry for this range.
8600 entry = malloc(sizeof(struct extent_entry));
8605 memset(entry, 0, sizeof(*entry));
8606 entry->bytenr = rec->start;
8607 entry->bytes = rec->nr;
8608 list_add_tail(&entry->list, &entries);
8612 best = find_most_right_entry(&entries);
8614 fprintf(stderr, "Backrefs and extent record evenly "
8615 "split on who is right, this is going to "
8616 "require user input to fix bytenr %Lu bytes "
8617 "%Lu\n", rec->start, rec->nr);
8624 * I don't think this can happen currently as we'll abort() if we catch
8625 * this case higher up, but in case somebody removes that we still can't
8626 * deal with it properly here yet, so just bail out of that's the case.
8628 if (best->bytenr != rec->start) {
8629 fprintf(stderr, "Extent start and backref starts don't match, "
8630 "please use btrfs-image on this file system and send "
8631 "it to a btrfs developer so they can make fsck fix "
8632 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8633 rec->start, rec->nr);
8639 * Ok great we all agreed on an extent record, let's go find the real
8640 * references and fix up the ones that don't match.
8642 list_for_each_entry(back, &rec->backrefs, list) {
8643 if (back->full_backref || !back->is_data)
8646 dback = to_data_backref(back);
8649 * Still ignoring backrefs that don't have a real ref attached
8652 if (dback->found_ref == 0)
8655 if (dback->bytes == best->bytes &&
8656 dback->disk_bytenr == best->bytenr)
8659 ret = repair_ref(info, path, dback, best);
8665 * Ok we messed with the actual refs, which means we need to drop our
8666 * entire cache and go back and rescan. I know this is a huge pain and
8667 * adds a lot of extra work, but it's the only way to be safe. Once all
8668 * the backrefs agree we may not need to do anything to the extent
8673 while (!list_empty(&entries)) {
8674 entry = list_entry(entries.next, struct extent_entry, list);
8675 list_del_init(&entry->list);
8681 static int process_duplicates(struct cache_tree *extent_cache,
8682 struct extent_record *rec)
8684 struct extent_record *good, *tmp;
8685 struct cache_extent *cache;
8689 * If we found a extent record for this extent then return, or if we
8690 * have more than one duplicate we are likely going to need to delete
8693 if (rec->found_rec || rec->num_duplicates > 1)
8696 /* Shouldn't happen but just in case */
8697 BUG_ON(!rec->num_duplicates);
8700 * So this happens if we end up with a backref that doesn't match the
8701 * actual extent entry. So either the backref is bad or the extent
8702 * entry is bad. Either way we want to have the extent_record actually
8703 * reflect what we found in the extent_tree, so we need to take the
8704 * duplicate out and use that as the extent_record since the only way we
8705 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8707 remove_cache_extent(extent_cache, &rec->cache);
8709 good = to_extent_record(rec->dups.next);
8710 list_del_init(&good->list);
8711 INIT_LIST_HEAD(&good->backrefs);
8712 INIT_LIST_HEAD(&good->dups);
8713 good->cache.start = good->start;
8714 good->cache.size = good->nr;
8715 good->content_checked = 0;
8716 good->owner_ref_checked = 0;
8717 good->num_duplicates = 0;
8718 good->refs = rec->refs;
8719 list_splice_init(&rec->backrefs, &good->backrefs);
8721 cache = lookup_cache_extent(extent_cache, good->start,
8725 tmp = container_of(cache, struct extent_record, cache);
8728 * If we find another overlapping extent and it's found_rec is
8729 * set then it's a duplicate and we need to try and delete
8732 if (tmp->found_rec || tmp->num_duplicates > 0) {
8733 if (list_empty(&good->list))
8734 list_add_tail(&good->list,
8735 &duplicate_extents);
8736 good->num_duplicates += tmp->num_duplicates + 1;
8737 list_splice_init(&tmp->dups, &good->dups);
8738 list_del_init(&tmp->list);
8739 list_add_tail(&tmp->list, &good->dups);
8740 remove_cache_extent(extent_cache, &tmp->cache);
8745 * Ok we have another non extent item backed extent rec, so lets
8746 * just add it to this extent and carry on like we did above.
8748 good->refs += tmp->refs;
8749 list_splice_init(&tmp->backrefs, &good->backrefs);
8750 remove_cache_extent(extent_cache, &tmp->cache);
8753 ret = insert_cache_extent(extent_cache, &good->cache);
8756 return good->num_duplicates ? 0 : 1;
8759 static int delete_duplicate_records(struct btrfs_root *root,
8760 struct extent_record *rec)
8762 struct btrfs_trans_handle *trans;
8763 LIST_HEAD(delete_list);
8764 struct btrfs_path path;
8765 struct extent_record *tmp, *good, *n;
8768 struct btrfs_key key;
8770 btrfs_init_path(&path);
8773 /* Find the record that covers all of the duplicates. */
8774 list_for_each_entry(tmp, &rec->dups, list) {
8775 if (good->start < tmp->start)
8777 if (good->nr > tmp->nr)
8780 if (tmp->start + tmp->nr < good->start + good->nr) {
8781 fprintf(stderr, "Ok we have overlapping extents that "
8782 "aren't completely covered by each other, this "
8783 "is going to require more careful thought. "
8784 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8785 tmp->start, tmp->nr, good->start, good->nr);
8792 list_add_tail(&rec->list, &delete_list);
8794 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8797 list_move_tail(&tmp->list, &delete_list);
8800 root = root->fs_info->extent_root;
8801 trans = btrfs_start_transaction(root, 1);
8802 if (IS_ERR(trans)) {
8803 ret = PTR_ERR(trans);
8807 list_for_each_entry(tmp, &delete_list, list) {
8808 if (tmp->found_rec == 0)
8810 key.objectid = tmp->start;
8811 key.type = BTRFS_EXTENT_ITEM_KEY;
8812 key.offset = tmp->nr;
8814 /* Shouldn't happen but just in case */
8815 if (tmp->metadata) {
8816 fprintf(stderr, "Well this shouldn't happen, extent "
8817 "record overlaps but is metadata? "
8818 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8822 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8828 ret = btrfs_del_item(trans, root, &path);
8831 btrfs_release_path(&path);
8834 err = btrfs_commit_transaction(trans, root);
8838 while (!list_empty(&delete_list)) {
8839 tmp = to_extent_record(delete_list.next);
8840 list_del_init(&tmp->list);
8846 while (!list_empty(&rec->dups)) {
8847 tmp = to_extent_record(rec->dups.next);
8848 list_del_init(&tmp->list);
8852 btrfs_release_path(&path);
8854 if (!ret && !nr_del)
8855 rec->num_duplicates = 0;
8857 return ret ? ret : nr_del;
8860 static int find_possible_backrefs(struct btrfs_fs_info *info,
8861 struct btrfs_path *path,
8862 struct cache_tree *extent_cache,
8863 struct extent_record *rec)
8865 struct btrfs_root *root;
8866 struct extent_backref *back;
8867 struct data_backref *dback;
8868 struct cache_extent *cache;
8869 struct btrfs_file_extent_item *fi;
8870 struct btrfs_key key;
8874 list_for_each_entry(back, &rec->backrefs, list) {
8875 /* Don't care about full backrefs (poor unloved backrefs) */
8876 if (back->full_backref || !back->is_data)
8879 dback = to_data_backref(back);
8881 /* We found this one, we don't need to do a lookup */
8882 if (dback->found_ref)
8885 key.objectid = dback->root;
8886 key.type = BTRFS_ROOT_ITEM_KEY;
8887 key.offset = (u64)-1;
8889 root = btrfs_read_fs_root(info, &key);
8891 /* No root, definitely a bad ref, skip */
8892 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8894 /* Other err, exit */
8896 return PTR_ERR(root);
8898 key.objectid = dback->owner;
8899 key.type = BTRFS_EXTENT_DATA_KEY;
8900 key.offset = dback->offset;
8901 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8903 btrfs_release_path(path);
8906 /* Didn't find it, we can carry on */
8911 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8912 struct btrfs_file_extent_item);
8913 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8914 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8915 btrfs_release_path(path);
8916 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8918 struct extent_record *tmp;
8919 tmp = container_of(cache, struct extent_record, cache);
8922 * If we found an extent record for the bytenr for this
8923 * particular backref then we can't add it to our
8924 * current extent record. We only want to add backrefs
8925 * that don't have a corresponding extent item in the
8926 * extent tree since they likely belong to this record
8927 * and we need to fix it if it doesn't match bytenrs.
8933 dback->found_ref += 1;
8934 dback->disk_bytenr = bytenr;
8935 dback->bytes = bytes;
8938 * Set this so the verify backref code knows not to trust the
8939 * values in this backref.
8948 * Record orphan data ref into corresponding root.
8950 * Return 0 if the extent item contains data ref and recorded.
8951 * Return 1 if the extent item contains no useful data ref
8952 * On that case, it may contains only shared_dataref or metadata backref
8953 * or the file extent exists(this should be handled by the extent bytenr
8955 * Return <0 if something goes wrong.
8957 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8958 struct extent_record *rec)
8960 struct btrfs_key key;
8961 struct btrfs_root *dest_root;
8962 struct extent_backref *back;
8963 struct data_backref *dback;
8964 struct orphan_data_extent *orphan;
8965 struct btrfs_path path;
8966 int recorded_data_ref = 0;
8971 btrfs_init_path(&path);
8972 list_for_each_entry(back, &rec->backrefs, list) {
8973 if (back->full_backref || !back->is_data ||
8974 !back->found_extent_tree)
8976 dback = to_data_backref(back);
8977 if (dback->found_ref)
8979 key.objectid = dback->root;
8980 key.type = BTRFS_ROOT_ITEM_KEY;
8981 key.offset = (u64)-1;
8983 dest_root = btrfs_read_fs_root(fs_info, &key);
8985 /* For non-exist root we just skip it */
8986 if (IS_ERR(dest_root) || !dest_root)
8989 key.objectid = dback->owner;
8990 key.type = BTRFS_EXTENT_DATA_KEY;
8991 key.offset = dback->offset;
8993 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8994 btrfs_release_path(&path);
8996 * For ret < 0, it's OK since the fs-tree may be corrupted,
8997 * we need to record it for inode/file extent rebuild.
8998 * For ret > 0, we record it only for file extent rebuild.
8999 * For ret == 0, the file extent exists but only bytenr
9000 * mismatch, let the original bytenr fix routine to handle,
9006 orphan = malloc(sizeof(*orphan));
9011 INIT_LIST_HEAD(&orphan->list);
9012 orphan->root = dback->root;
9013 orphan->objectid = dback->owner;
9014 orphan->offset = dback->offset;
9015 orphan->disk_bytenr = rec->cache.start;
9016 orphan->disk_len = rec->cache.size;
9017 list_add(&dest_root->orphan_data_extents, &orphan->list);
9018 recorded_data_ref = 1;
9021 btrfs_release_path(&path);
9023 return !recorded_data_ref;
9029 * when an incorrect extent item is found, this will delete
9030 * all of the existing entries for it and recreate them
9031 * based on what the tree scan found.
9033 static int fixup_extent_refs(struct btrfs_fs_info *info,
9034 struct cache_tree *extent_cache,
9035 struct extent_record *rec)
9037 struct btrfs_trans_handle *trans = NULL;
9039 struct btrfs_path path;
9040 struct list_head *cur = rec->backrefs.next;
9041 struct cache_extent *cache;
9042 struct extent_backref *back;
9046 if (rec->flag_block_full_backref)
9047 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9049 btrfs_init_path(&path);
9050 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9052 * Sometimes the backrefs themselves are so broken they don't
9053 * get attached to any meaningful rec, so first go back and
9054 * check any of our backrefs that we couldn't find and throw
9055 * them into the list if we find the backref so that
9056 * verify_backrefs can figure out what to do.
9058 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9063 /* step one, make sure all of the backrefs agree */
9064 ret = verify_backrefs(info, &path, rec);
9068 trans = btrfs_start_transaction(info->extent_root, 1);
9069 if (IS_ERR(trans)) {
9070 ret = PTR_ERR(trans);
9074 /* step two, delete all the existing records */
9075 ret = delete_extent_records(trans, info->extent_root, &path,
9081 /* was this block corrupt? If so, don't add references to it */
9082 cache = lookup_cache_extent(info->corrupt_blocks,
9083 rec->start, rec->max_size);
9089 /* step three, recreate all the refs we did find */
9090 while(cur != &rec->backrefs) {
9091 back = to_extent_backref(cur);
9095 * if we didn't find any references, don't create a
9098 if (!back->found_ref)
9101 rec->bad_full_backref = 0;
9102 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9110 int err = btrfs_commit_transaction(trans, info->extent_root);
9116 fprintf(stderr, "Repaired extent references for %llu\n",
9117 (unsigned long long)rec->start);
9119 btrfs_release_path(&path);
9123 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9124 struct extent_record *rec)
9126 struct btrfs_trans_handle *trans;
9127 struct btrfs_root *root = fs_info->extent_root;
9128 struct btrfs_path path;
9129 struct btrfs_extent_item *ei;
9130 struct btrfs_key key;
9134 key.objectid = rec->start;
9135 if (rec->metadata) {
9136 key.type = BTRFS_METADATA_ITEM_KEY;
9137 key.offset = rec->info_level;
9139 key.type = BTRFS_EXTENT_ITEM_KEY;
9140 key.offset = rec->max_size;
9143 trans = btrfs_start_transaction(root, 0);
9145 return PTR_ERR(trans);
9147 btrfs_init_path(&path);
9148 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9150 btrfs_release_path(&path);
9151 btrfs_commit_transaction(trans, root);
9154 fprintf(stderr, "Didn't find extent for %llu\n",
9155 (unsigned long long)rec->start);
9156 btrfs_release_path(&path);
9157 btrfs_commit_transaction(trans, root);
9161 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9162 struct btrfs_extent_item);
9163 flags = btrfs_extent_flags(path.nodes[0], ei);
9164 if (rec->flag_block_full_backref) {
9165 fprintf(stderr, "setting full backref on %llu\n",
9166 (unsigned long long)key.objectid);
9167 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9169 fprintf(stderr, "clearing full backref on %llu\n",
9170 (unsigned long long)key.objectid);
9171 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9173 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9174 btrfs_mark_buffer_dirty(path.nodes[0]);
9175 btrfs_release_path(&path);
9176 ret = btrfs_commit_transaction(trans, root);
9178 fprintf(stderr, "Repaired extent flags for %llu\n",
9179 (unsigned long long)rec->start);
9184 /* right now we only prune from the extent allocation tree */
9185 static int prune_one_block(struct btrfs_trans_handle *trans,
9186 struct btrfs_fs_info *info,
9187 struct btrfs_corrupt_block *corrupt)
9190 struct btrfs_path path;
9191 struct extent_buffer *eb;
9195 int level = corrupt->level + 1;
9197 btrfs_init_path(&path);
9199 /* we want to stop at the parent to our busted block */
9200 path.lowest_level = level;
9202 ret = btrfs_search_slot(trans, info->extent_root,
9203 &corrupt->key, &path, -1, 1);
9208 eb = path.nodes[level];
9215 * hopefully the search gave us the block we want to prune,
9216 * lets try that first
9218 slot = path.slots[level];
9219 found = btrfs_node_blockptr(eb, slot);
9220 if (found == corrupt->cache.start)
9223 nritems = btrfs_header_nritems(eb);
9225 /* the search failed, lets scan this node and hope we find it */
9226 for (slot = 0; slot < nritems; slot++) {
9227 found = btrfs_node_blockptr(eb, slot);
9228 if (found == corrupt->cache.start)
9232 * we couldn't find the bad block. TODO, search all the nodes for pointers
9235 if (eb == info->extent_root->node) {
9240 btrfs_release_path(&path);
9245 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9246 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9249 btrfs_release_path(&path);
9253 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9255 struct btrfs_trans_handle *trans = NULL;
9256 struct cache_extent *cache;
9257 struct btrfs_corrupt_block *corrupt;
9260 cache = search_cache_extent(info->corrupt_blocks, 0);
9264 trans = btrfs_start_transaction(info->extent_root, 1);
9266 return PTR_ERR(trans);
9268 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9269 prune_one_block(trans, info, corrupt);
9270 remove_cache_extent(info->corrupt_blocks, cache);
9273 return btrfs_commit_transaction(trans, info->extent_root);
9277 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9279 struct btrfs_block_group_cache *cache;
9284 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9285 &start, &end, EXTENT_DIRTY);
9288 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9293 cache = btrfs_lookup_first_block_group(fs_info, start);
9298 start = cache->key.objectid + cache->key.offset;
9302 static int check_extent_refs(struct btrfs_root *root,
9303 struct cache_tree *extent_cache)
9305 struct extent_record *rec;
9306 struct cache_extent *cache;
9312 * if we're doing a repair, we have to make sure
9313 * we don't allocate from the problem extents.
9314 * In the worst case, this will be all the
9317 cache = search_cache_extent(extent_cache, 0);
9319 rec = container_of(cache, struct extent_record, cache);
9320 set_extent_dirty(root->fs_info->excluded_extents,
9322 rec->start + rec->max_size - 1);
9323 cache = next_cache_extent(cache);
9326 /* pin down all the corrupted blocks too */
9327 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9329 set_extent_dirty(root->fs_info->excluded_extents,
9331 cache->start + cache->size - 1);
9332 cache = next_cache_extent(cache);
9334 prune_corrupt_blocks(root->fs_info);
9335 reset_cached_block_groups(root->fs_info);
9338 reset_cached_block_groups(root->fs_info);
9341 * We need to delete any duplicate entries we find first otherwise we
9342 * could mess up the extent tree when we have backrefs that actually
9343 * belong to a different extent item and not the weird duplicate one.
9345 while (repair && !list_empty(&duplicate_extents)) {
9346 rec = to_extent_record(duplicate_extents.next);
9347 list_del_init(&rec->list);
9349 /* Sometimes we can find a backref before we find an actual
9350 * extent, so we need to process it a little bit to see if there
9351 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9352 * if this is a backref screwup. If we need to delete stuff
9353 * process_duplicates() will return 0, otherwise it will return
9356 if (process_duplicates(extent_cache, rec))
9358 ret = delete_duplicate_records(root, rec);
9362 * delete_duplicate_records will return the number of entries
9363 * deleted, so if it's greater than 0 then we know we actually
9364 * did something and we need to remove.
9377 cache = search_cache_extent(extent_cache, 0);
9380 rec = container_of(cache, struct extent_record, cache);
9381 if (rec->num_duplicates) {
9382 fprintf(stderr, "extent item %llu has multiple extent "
9383 "items\n", (unsigned long long)rec->start);
9387 if (rec->refs != rec->extent_item_refs) {
9388 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9389 (unsigned long long)rec->start,
9390 (unsigned long long)rec->nr);
9391 fprintf(stderr, "extent item %llu, found %llu\n",
9392 (unsigned long long)rec->extent_item_refs,
9393 (unsigned long long)rec->refs);
9394 ret = record_orphan_data_extents(root->fs_info, rec);
9400 if (all_backpointers_checked(rec, 1)) {
9401 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9402 (unsigned long long)rec->start,
9403 (unsigned long long)rec->nr);
9407 if (!rec->owner_ref_checked) {
9408 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9409 (unsigned long long)rec->start,
9410 (unsigned long long)rec->nr);
9415 if (repair && fix) {
9416 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9422 if (rec->bad_full_backref) {
9423 fprintf(stderr, "bad full backref, on [%llu]\n",
9424 (unsigned long long)rec->start);
9426 ret = fixup_extent_flags(root->fs_info, rec);
9434 * Although it's not a extent ref's problem, we reuse this
9435 * routine for error reporting.
9436 * No repair function yet.
9438 if (rec->crossing_stripes) {
9440 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9441 rec->start, rec->start + rec->max_size);
9445 if (rec->wrong_chunk_type) {
9447 "bad extent [%llu, %llu), type mismatch with chunk\n",
9448 rec->start, rec->start + rec->max_size);
9452 remove_cache_extent(extent_cache, cache);
9453 free_all_extent_backrefs(rec);
9454 if (!init_extent_tree && repair && (!cur_err || fix))
9455 clear_extent_dirty(root->fs_info->excluded_extents,
9457 rec->start + rec->max_size - 1);
9462 if (ret && ret != -EAGAIN) {
9463 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9466 struct btrfs_trans_handle *trans;
9468 root = root->fs_info->extent_root;
9469 trans = btrfs_start_transaction(root, 1);
9470 if (IS_ERR(trans)) {
9471 ret = PTR_ERR(trans);
9475 btrfs_fix_block_accounting(trans, root);
9476 ret = btrfs_commit_transaction(trans, root);
9485 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9489 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9490 stripe_size = length;
9491 stripe_size /= num_stripes;
9492 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9493 stripe_size = length * 2;
9494 stripe_size /= num_stripes;
9495 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9496 stripe_size = length;
9497 stripe_size /= (num_stripes - 1);
9498 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9499 stripe_size = length;
9500 stripe_size /= (num_stripes - 2);
9502 stripe_size = length;
9508 * Check the chunk with its block group/dev list ref:
9509 * Return 0 if all refs seems valid.
9510 * Return 1 if part of refs seems valid, need later check for rebuild ref
9511 * like missing block group and needs to search extent tree to rebuild them.
9512 * Return -1 if essential refs are missing and unable to rebuild.
9514 static int check_chunk_refs(struct chunk_record *chunk_rec,
9515 struct block_group_tree *block_group_cache,
9516 struct device_extent_tree *dev_extent_cache,
9519 struct cache_extent *block_group_item;
9520 struct block_group_record *block_group_rec;
9521 struct cache_extent *dev_extent_item;
9522 struct device_extent_record *dev_extent_rec;
9526 int metadump_v2 = 0;
9530 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9533 if (block_group_item) {
9534 block_group_rec = container_of(block_group_item,
9535 struct block_group_record,
9537 if (chunk_rec->length != block_group_rec->offset ||
9538 chunk_rec->offset != block_group_rec->objectid ||
9540 chunk_rec->type_flags != block_group_rec->flags)) {
9543 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9544 chunk_rec->objectid,
9549 chunk_rec->type_flags,
9550 block_group_rec->objectid,
9551 block_group_rec->type,
9552 block_group_rec->offset,
9553 block_group_rec->offset,
9554 block_group_rec->objectid,
9555 block_group_rec->flags);
9558 list_del_init(&block_group_rec->list);
9559 chunk_rec->bg_rec = block_group_rec;
9564 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9565 chunk_rec->objectid,
9570 chunk_rec->type_flags);
9577 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9578 chunk_rec->num_stripes);
9579 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9580 devid = chunk_rec->stripes[i].devid;
9581 offset = chunk_rec->stripes[i].offset;
9582 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9583 devid, offset, length);
9584 if (dev_extent_item) {
9585 dev_extent_rec = container_of(dev_extent_item,
9586 struct device_extent_record,
9588 if (dev_extent_rec->objectid != devid ||
9589 dev_extent_rec->offset != offset ||
9590 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9591 dev_extent_rec->length != length) {
9594 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9595 chunk_rec->objectid,
9598 chunk_rec->stripes[i].devid,
9599 chunk_rec->stripes[i].offset,
9600 dev_extent_rec->objectid,
9601 dev_extent_rec->offset,
9602 dev_extent_rec->length);
9605 list_move(&dev_extent_rec->chunk_list,
9606 &chunk_rec->dextents);
9611 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9612 chunk_rec->objectid,
9615 chunk_rec->stripes[i].devid,
9616 chunk_rec->stripes[i].offset);
9623 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9624 int check_chunks(struct cache_tree *chunk_cache,
9625 struct block_group_tree *block_group_cache,
9626 struct device_extent_tree *dev_extent_cache,
9627 struct list_head *good, struct list_head *bad,
9628 struct list_head *rebuild, int silent)
9630 struct cache_extent *chunk_item;
9631 struct chunk_record *chunk_rec;
9632 struct block_group_record *bg_rec;
9633 struct device_extent_record *dext_rec;
9637 chunk_item = first_cache_extent(chunk_cache);
9638 while (chunk_item) {
9639 chunk_rec = container_of(chunk_item, struct chunk_record,
9641 err = check_chunk_refs(chunk_rec, block_group_cache,
9642 dev_extent_cache, silent);
9645 if (err == 0 && good)
9646 list_add_tail(&chunk_rec->list, good);
9647 if (err > 0 && rebuild)
9648 list_add_tail(&chunk_rec->list, rebuild);
9650 list_add_tail(&chunk_rec->list, bad);
9651 chunk_item = next_cache_extent(chunk_item);
9654 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9657 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9665 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9669 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9680 static int check_device_used(struct device_record *dev_rec,
9681 struct device_extent_tree *dext_cache)
9683 struct cache_extent *cache;
9684 struct device_extent_record *dev_extent_rec;
9687 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9689 dev_extent_rec = container_of(cache,
9690 struct device_extent_record,
9692 if (dev_extent_rec->objectid != dev_rec->devid)
9695 list_del_init(&dev_extent_rec->device_list);
9696 total_byte += dev_extent_rec->length;
9697 cache = next_cache_extent(cache);
9700 if (total_byte != dev_rec->byte_used) {
9702 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9703 total_byte, dev_rec->byte_used, dev_rec->objectid,
9704 dev_rec->type, dev_rec->offset);
9711 /* check btrfs_dev_item -> btrfs_dev_extent */
9712 static int check_devices(struct rb_root *dev_cache,
9713 struct device_extent_tree *dev_extent_cache)
9715 struct rb_node *dev_node;
9716 struct device_record *dev_rec;
9717 struct device_extent_record *dext_rec;
9721 dev_node = rb_first(dev_cache);
9723 dev_rec = container_of(dev_node, struct device_record, node);
9724 err = check_device_used(dev_rec, dev_extent_cache);
9728 dev_node = rb_next(dev_node);
9730 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9733 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9734 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9741 static int add_root_item_to_list(struct list_head *head,
9742 u64 objectid, u64 bytenr, u64 last_snapshot,
9743 u8 level, u8 drop_level,
9744 int level_size, struct btrfs_key *drop_key)
9747 struct root_item_record *ri_rec;
9748 ri_rec = malloc(sizeof(*ri_rec));
9751 ri_rec->bytenr = bytenr;
9752 ri_rec->objectid = objectid;
9753 ri_rec->level = level;
9754 ri_rec->level_size = level_size;
9755 ri_rec->drop_level = drop_level;
9756 ri_rec->last_snapshot = last_snapshot;
9758 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9759 list_add_tail(&ri_rec->list, head);
9764 static void free_root_item_list(struct list_head *list)
9766 struct root_item_record *ri_rec;
9768 while (!list_empty(list)) {
9769 ri_rec = list_first_entry(list, struct root_item_record,
9771 list_del_init(&ri_rec->list);
9776 static int deal_root_from_list(struct list_head *list,
9777 struct btrfs_root *root,
9778 struct block_info *bits,
9780 struct cache_tree *pending,
9781 struct cache_tree *seen,
9782 struct cache_tree *reada,
9783 struct cache_tree *nodes,
9784 struct cache_tree *extent_cache,
9785 struct cache_tree *chunk_cache,
9786 struct rb_root *dev_cache,
9787 struct block_group_tree *block_group_cache,
9788 struct device_extent_tree *dev_extent_cache)
9793 while (!list_empty(list)) {
9794 struct root_item_record *rec;
9795 struct extent_buffer *buf;
9796 rec = list_entry(list->next,
9797 struct root_item_record, list);
9799 buf = read_tree_block(root->fs_info,
9800 rec->bytenr, rec->level_size, 0);
9801 if (!extent_buffer_uptodate(buf)) {
9802 free_extent_buffer(buf);
9806 ret = add_root_to_pending(buf, extent_cache, pending,
9807 seen, nodes, rec->objectid);
9811 * To rebuild extent tree, we need deal with snapshot
9812 * one by one, otherwise we deal with node firstly which
9813 * can maximize readahead.
9816 ret = run_next_block(root, bits, bits_nr, &last,
9817 pending, seen, reada, nodes,
9818 extent_cache, chunk_cache,
9819 dev_cache, block_group_cache,
9820 dev_extent_cache, rec);
9824 free_extent_buffer(buf);
9825 list_del(&rec->list);
9831 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9832 reada, nodes, extent_cache, chunk_cache,
9833 dev_cache, block_group_cache,
9834 dev_extent_cache, NULL);
9844 static int check_chunks_and_extents(struct btrfs_root *root)
9846 struct rb_root dev_cache;
9847 struct cache_tree chunk_cache;
9848 struct block_group_tree block_group_cache;
9849 struct device_extent_tree dev_extent_cache;
9850 struct cache_tree extent_cache;
9851 struct cache_tree seen;
9852 struct cache_tree pending;
9853 struct cache_tree reada;
9854 struct cache_tree nodes;
9855 struct extent_io_tree excluded_extents;
9856 struct cache_tree corrupt_blocks;
9857 struct btrfs_path path;
9858 struct btrfs_key key;
9859 struct btrfs_key found_key;
9861 struct block_info *bits;
9863 struct extent_buffer *leaf;
9865 struct btrfs_root_item ri;
9866 struct list_head dropping_trees;
9867 struct list_head normal_trees;
9868 struct btrfs_root *root1;
9873 dev_cache = RB_ROOT;
9874 cache_tree_init(&chunk_cache);
9875 block_group_tree_init(&block_group_cache);
9876 device_extent_tree_init(&dev_extent_cache);
9878 cache_tree_init(&extent_cache);
9879 cache_tree_init(&seen);
9880 cache_tree_init(&pending);
9881 cache_tree_init(&nodes);
9882 cache_tree_init(&reada);
9883 cache_tree_init(&corrupt_blocks);
9884 extent_io_tree_init(&excluded_extents);
9885 INIT_LIST_HEAD(&dropping_trees);
9886 INIT_LIST_HEAD(&normal_trees);
9889 root->fs_info->excluded_extents = &excluded_extents;
9890 root->fs_info->fsck_extent_cache = &extent_cache;
9891 root->fs_info->free_extent_hook = free_extent_hook;
9892 root->fs_info->corrupt_blocks = &corrupt_blocks;
9896 bits = malloc(bits_nr * sizeof(struct block_info));
9902 if (ctx.progress_enabled) {
9903 ctx.tp = TASK_EXTENTS;
9904 task_start(ctx.info);
9908 root1 = root->fs_info->tree_root;
9909 level = btrfs_header_level(root1->node);
9910 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9911 root1->node->start, 0, level, 0,
9912 root1->fs_info->nodesize, NULL);
9915 root1 = root->fs_info->chunk_root;
9916 level = btrfs_header_level(root1->node);
9917 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9918 root1->node->start, 0, level, 0,
9919 root1->fs_info->nodesize, NULL);
9922 btrfs_init_path(&path);
9925 key.type = BTRFS_ROOT_ITEM_KEY;
9926 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9931 leaf = path.nodes[0];
9932 slot = path.slots[0];
9933 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9934 ret = btrfs_next_leaf(root, &path);
9937 leaf = path.nodes[0];
9938 slot = path.slots[0];
9940 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9941 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9942 unsigned long offset;
9945 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9946 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9947 last_snapshot = btrfs_root_last_snapshot(&ri);
9948 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9949 level = btrfs_root_level(&ri);
9950 level_size = root->fs_info->nodesize;
9951 ret = add_root_item_to_list(&normal_trees,
9953 btrfs_root_bytenr(&ri),
9954 last_snapshot, level,
9955 0, level_size, NULL);
9959 level = btrfs_root_level(&ri);
9960 level_size = root->fs_info->nodesize;
9961 objectid = found_key.objectid;
9962 btrfs_disk_key_to_cpu(&found_key,
9964 ret = add_root_item_to_list(&dropping_trees,
9966 btrfs_root_bytenr(&ri),
9967 last_snapshot, level,
9969 level_size, &found_key);
9976 btrfs_release_path(&path);
9979 * check_block can return -EAGAIN if it fixes something, please keep
9980 * this in mind when dealing with return values from these functions, if
9981 * we get -EAGAIN we want to fall through and restart the loop.
9983 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9984 &seen, &reada, &nodes, &extent_cache,
9985 &chunk_cache, &dev_cache, &block_group_cache,
9992 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9993 &pending, &seen, &reada, &nodes,
9994 &extent_cache, &chunk_cache, &dev_cache,
9995 &block_group_cache, &dev_extent_cache);
10002 ret = check_chunks(&chunk_cache, &block_group_cache,
10003 &dev_extent_cache, NULL, NULL, NULL, 0);
10005 if (ret == -EAGAIN)
10010 ret = check_extent_refs(root, &extent_cache);
10012 if (ret == -EAGAIN)
10017 ret = check_devices(&dev_cache, &dev_extent_cache);
10022 task_stop(ctx.info);
10024 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10025 extent_io_tree_cleanup(&excluded_extents);
10026 root->fs_info->fsck_extent_cache = NULL;
10027 root->fs_info->free_extent_hook = NULL;
10028 root->fs_info->corrupt_blocks = NULL;
10029 root->fs_info->excluded_extents = NULL;
10032 free_chunk_cache_tree(&chunk_cache);
10033 free_device_cache_tree(&dev_cache);
10034 free_block_group_tree(&block_group_cache);
10035 free_device_extent_tree(&dev_extent_cache);
10036 free_extent_cache_tree(&seen);
10037 free_extent_cache_tree(&pending);
10038 free_extent_cache_tree(&reada);
10039 free_extent_cache_tree(&nodes);
10040 free_root_item_list(&normal_trees);
10041 free_root_item_list(&dropping_trees);
10044 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10045 free_extent_cache_tree(&seen);
10046 free_extent_cache_tree(&pending);
10047 free_extent_cache_tree(&reada);
10048 free_extent_cache_tree(&nodes);
10049 free_chunk_cache_tree(&chunk_cache);
10050 free_block_group_tree(&block_group_cache);
10051 free_device_cache_tree(&dev_cache);
10052 free_device_extent_tree(&dev_extent_cache);
10053 free_extent_record_cache(&extent_cache);
10054 free_root_item_list(&normal_trees);
10055 free_root_item_list(&dropping_trees);
10056 extent_io_tree_cleanup(&excluded_extents);
10061 * Check backrefs of a tree block given by @bytenr or @eb.
10063 * @root: the root containing the @bytenr or @eb
10064 * @eb: tree block extent buffer, can be NULL
10065 * @bytenr: bytenr of the tree block to search
10066 * @level: tree level of the tree block
10067 * @owner: owner of the tree block
10069 * Return >0 for any error found and output error message
10070 * Return 0 for no error found
10072 static int check_tree_block_ref(struct btrfs_root *root,
10073 struct extent_buffer *eb, u64 bytenr,
10074 int level, u64 owner)
10076 struct btrfs_key key;
10077 struct btrfs_root *extent_root = root->fs_info->extent_root;
10078 struct btrfs_path path;
10079 struct btrfs_extent_item *ei;
10080 struct btrfs_extent_inline_ref *iref;
10081 struct extent_buffer *leaf;
10087 u32 nodesize = root->fs_info->nodesize;
10090 int tree_reloc_root = 0;
10095 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10096 btrfs_header_bytenr(root->node) == bytenr)
10097 tree_reloc_root = 1;
10099 btrfs_init_path(&path);
10100 key.objectid = bytenr;
10101 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10102 key.type = BTRFS_METADATA_ITEM_KEY;
10104 key.type = BTRFS_EXTENT_ITEM_KEY;
10105 key.offset = (u64)-1;
10107 /* Search for the backref in extent tree */
10108 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10110 err |= BACKREF_MISSING;
10113 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10115 err |= BACKREF_MISSING;
10119 leaf = path.nodes[0];
10120 slot = path.slots[0];
10121 btrfs_item_key_to_cpu(leaf, &key, slot);
10123 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10125 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10126 skinny_level = (int)key.offset;
10127 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10129 struct btrfs_tree_block_info *info;
10131 info = (struct btrfs_tree_block_info *)(ei + 1);
10132 skinny_level = btrfs_tree_block_level(leaf, info);
10133 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10140 if (!(btrfs_extent_flags(leaf, ei) &
10141 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10143 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10144 key.objectid, nodesize,
10145 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10146 err = BACKREF_MISMATCH;
10148 header_gen = btrfs_header_generation(eb);
10149 extent_gen = btrfs_extent_generation(leaf, ei);
10150 if (header_gen != extent_gen) {
10152 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10153 key.objectid, nodesize, header_gen,
10155 err = BACKREF_MISMATCH;
10157 if (level != skinny_level) {
10159 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10160 key.objectid, nodesize, level, skinny_level);
10161 err = BACKREF_MISMATCH;
10163 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10165 "extent[%llu %u] is referred by other roots than %llu",
10166 key.objectid, nodesize, root->objectid);
10167 err = BACKREF_MISMATCH;
10172 * Iterate the extent/metadata item to find the exact backref
10174 item_size = btrfs_item_size_nr(leaf, slot);
10175 ptr = (unsigned long)iref;
10176 end = (unsigned long)ei + item_size;
10177 while (ptr < end) {
10178 iref = (struct btrfs_extent_inline_ref *)ptr;
10179 type = btrfs_extent_inline_ref_type(leaf, iref);
10180 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10182 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10183 (offset == root->objectid || offset == owner)) {
10185 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10187 * Backref of tree reloc root points to itself, no need
10188 * to check backref any more.
10190 if (tree_reloc_root)
10193 /* Check if the backref points to valid referencer */
10194 found_ref = !check_tree_block_ref(root, NULL,
10195 offset, level + 1, owner);
10200 ptr += btrfs_extent_inline_ref_size(type);
10204 * Inlined extent item doesn't have what we need, check
10205 * TREE_BLOCK_REF_KEY
10208 btrfs_release_path(&path);
10209 key.objectid = bytenr;
10210 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10211 key.offset = root->objectid;
10213 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10218 err |= BACKREF_MISSING;
10220 btrfs_release_path(&path);
10221 if (eb && (err & BACKREF_MISSING))
10222 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10223 bytenr, nodesize, owner, level);
10228 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10230 * Return >0 any error found and output error message
10231 * Return 0 for no error found
10233 static int check_extent_data_item(struct btrfs_root *root,
10234 struct extent_buffer *eb, int slot)
10236 struct btrfs_file_extent_item *fi;
10237 struct btrfs_path path;
10238 struct btrfs_root *extent_root = root->fs_info->extent_root;
10239 struct btrfs_key fi_key;
10240 struct btrfs_key dbref_key;
10241 struct extent_buffer *leaf;
10242 struct btrfs_extent_item *ei;
10243 struct btrfs_extent_inline_ref *iref;
10244 struct btrfs_extent_data_ref *dref;
10247 u64 disk_num_bytes;
10248 u64 extent_num_bytes;
10255 int found_dbackref = 0;
10259 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10260 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10262 /* Nothing to check for hole and inline data extents */
10263 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10264 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10267 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10268 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10269 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10271 /* Check unaligned disk_num_bytes and num_bytes */
10272 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10274 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10275 fi_key.objectid, fi_key.offset, disk_num_bytes,
10276 root->fs_info->sectorsize);
10277 err |= BYTES_UNALIGNED;
10279 data_bytes_allocated += disk_num_bytes;
10281 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10283 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10284 fi_key.objectid, fi_key.offset, extent_num_bytes,
10285 root->fs_info->sectorsize);
10286 err |= BYTES_UNALIGNED;
10288 data_bytes_referenced += extent_num_bytes;
10290 owner = btrfs_header_owner(eb);
10292 /* Check the extent item of the file extent in extent tree */
10293 btrfs_init_path(&path);
10294 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10295 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10296 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10298 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10302 leaf = path.nodes[0];
10303 slot = path.slots[0];
10304 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10306 extent_flags = btrfs_extent_flags(leaf, ei);
10308 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10310 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10311 disk_bytenr, disk_num_bytes,
10312 BTRFS_EXTENT_FLAG_DATA);
10313 err |= BACKREF_MISMATCH;
10316 /* Check data backref inside that extent item */
10317 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10318 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10319 ptr = (unsigned long)iref;
10320 end = (unsigned long)ei + item_size;
10321 while (ptr < end) {
10322 iref = (struct btrfs_extent_inline_ref *)ptr;
10323 type = btrfs_extent_inline_ref_type(leaf, iref);
10324 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10326 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10327 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10328 if (ref_root == owner || ref_root == root->objectid)
10329 found_dbackref = 1;
10330 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10331 found_dbackref = !check_tree_block_ref(root, NULL,
10332 btrfs_extent_inline_ref_offset(leaf, iref),
10336 if (found_dbackref)
10338 ptr += btrfs_extent_inline_ref_size(type);
10341 if (!found_dbackref) {
10342 btrfs_release_path(&path);
10344 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10345 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10346 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10347 dbref_key.offset = hash_extent_data_ref(root->objectid,
10348 fi_key.objectid, fi_key.offset);
10350 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10351 &dbref_key, &path, 0, 0);
10353 found_dbackref = 1;
10357 btrfs_release_path(&path);
10360 * Neither inlined nor EXTENT_DATA_REF found, try
10361 * SHARED_DATA_REF as last chance.
10363 dbref_key.objectid = disk_bytenr;
10364 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10365 dbref_key.offset = eb->start;
10367 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10368 &dbref_key, &path, 0, 0);
10370 found_dbackref = 1;
10376 if (!found_dbackref)
10377 err |= BACKREF_MISSING;
10378 btrfs_release_path(&path);
10379 if (err & BACKREF_MISSING) {
10380 error("data extent[%llu %llu] backref lost",
10381 disk_bytenr, disk_num_bytes);
10387 * Get real tree block level for the case like shared block
10388 * Return >= 0 as tree level
10389 * Return <0 for error
10391 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10393 struct extent_buffer *eb;
10394 struct btrfs_path path;
10395 struct btrfs_key key;
10396 struct btrfs_extent_item *ei;
10403 /* Search extent tree for extent generation and level */
10404 key.objectid = bytenr;
10405 key.type = BTRFS_METADATA_ITEM_KEY;
10406 key.offset = (u64)-1;
10408 btrfs_init_path(&path);
10409 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10412 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10420 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10421 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10422 struct btrfs_extent_item);
10423 flags = btrfs_extent_flags(path.nodes[0], ei);
10424 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10429 /* Get transid for later read_tree_block() check */
10430 transid = btrfs_extent_generation(path.nodes[0], ei);
10432 /* Get backref level as one source */
10433 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10434 backref_level = key.offset;
10436 struct btrfs_tree_block_info *info;
10438 info = (struct btrfs_tree_block_info *)(ei + 1);
10439 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10441 btrfs_release_path(&path);
10443 /* Get level from tree block as an alternative source */
10444 eb = read_tree_block(fs_info, bytenr, fs_info->nodesize, transid);
10445 if (!extent_buffer_uptodate(eb)) {
10446 free_extent_buffer(eb);
10449 header_level = btrfs_header_level(eb);
10450 free_extent_buffer(eb);
10452 if (header_level != backref_level)
10454 return header_level;
10457 btrfs_release_path(&path);
10462 * Check if a tree block backref is valid (points to a valid tree block)
10463 * if level == -1, level will be resolved
10464 * Return >0 for any error found and print error message
10466 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10467 u64 bytenr, int level)
10469 struct btrfs_root *root;
10470 struct btrfs_key key;
10471 struct btrfs_path path;
10472 struct extent_buffer *eb;
10473 struct extent_buffer *node;
10474 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10478 /* Query level for level == -1 special case */
10480 level = query_tree_block_level(fs_info, bytenr);
10482 err |= REFERENCER_MISSING;
10486 key.objectid = root_id;
10487 key.type = BTRFS_ROOT_ITEM_KEY;
10488 key.offset = (u64)-1;
10490 root = btrfs_read_fs_root(fs_info, &key);
10491 if (IS_ERR(root)) {
10492 err |= REFERENCER_MISSING;
10496 /* Read out the tree block to get item/node key */
10497 eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10498 if (!extent_buffer_uptodate(eb)) {
10499 err |= REFERENCER_MISSING;
10500 free_extent_buffer(eb);
10504 /* Empty tree, no need to check key */
10505 if (!btrfs_header_nritems(eb) && !level) {
10506 free_extent_buffer(eb);
10511 btrfs_node_key_to_cpu(eb, &key, 0);
10513 btrfs_item_key_to_cpu(eb, &key, 0);
10515 free_extent_buffer(eb);
10517 btrfs_init_path(&path);
10518 path.lowest_level = level;
10519 /* Search with the first key, to ensure we can reach it */
10520 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10522 err |= REFERENCER_MISSING;
10526 node = path.nodes[level];
10527 if (btrfs_header_bytenr(node) != bytenr) {
10529 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10530 bytenr, nodesize, bytenr,
10531 btrfs_header_bytenr(node));
10532 err |= REFERENCER_MISMATCH;
10534 if (btrfs_header_level(node) != level) {
10536 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10537 bytenr, nodesize, level,
10538 btrfs_header_level(node));
10539 err |= REFERENCER_MISMATCH;
10543 btrfs_release_path(&path);
10545 if (err & REFERENCER_MISSING) {
10547 error("extent [%llu %d] lost referencer (owner: %llu)",
10548 bytenr, nodesize, root_id);
10551 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10552 bytenr, nodesize, root_id, level);
10559 * Check if tree block @eb is tree reloc root.
10560 * Return 0 if it's not or any problem happens
10561 * Return 1 if it's a tree reloc root
10563 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10564 struct extent_buffer *eb)
10566 struct btrfs_root *tree_reloc_root;
10567 struct btrfs_key key;
10568 u64 bytenr = btrfs_header_bytenr(eb);
10569 u64 owner = btrfs_header_owner(eb);
10572 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10573 key.offset = owner;
10574 key.type = BTRFS_ROOT_ITEM_KEY;
10576 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10577 if (IS_ERR(tree_reloc_root))
10580 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10582 btrfs_free_fs_root(tree_reloc_root);
10587 * Check referencer for shared block backref
10588 * If level == -1, this function will resolve the level.
10590 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10591 u64 parent, u64 bytenr, int level)
10593 struct extent_buffer *eb;
10595 int found_parent = 0;
10598 eb = read_tree_block(fs_info, parent, fs_info->nodesize, 0);
10599 if (!extent_buffer_uptodate(eb))
10603 level = query_tree_block_level(fs_info, bytenr);
10607 /* It's possible it's a tree reloc root */
10608 if (parent == bytenr) {
10609 if (is_tree_reloc_root(fs_info, eb))
10614 if (level + 1 != btrfs_header_level(eb))
10617 nr = btrfs_header_nritems(eb);
10618 for (i = 0; i < nr; i++) {
10619 if (bytenr == btrfs_node_blockptr(eb, i)) {
10625 free_extent_buffer(eb);
10626 if (!found_parent) {
10628 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10629 bytenr, fs_info->nodesize, parent, level);
10630 return REFERENCER_MISSING;
10636 * Check referencer for normal (inlined) data ref
10637 * If len == 0, it will be resolved by searching in extent tree
10639 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10640 u64 root_id, u64 objectid, u64 offset,
10641 u64 bytenr, u64 len, u32 count)
10643 struct btrfs_root *root;
10644 struct btrfs_root *extent_root = fs_info->extent_root;
10645 struct btrfs_key key;
10646 struct btrfs_path path;
10647 struct extent_buffer *leaf;
10648 struct btrfs_file_extent_item *fi;
10649 u32 found_count = 0;
10654 key.objectid = bytenr;
10655 key.type = BTRFS_EXTENT_ITEM_KEY;
10656 key.offset = (u64)-1;
10658 btrfs_init_path(&path);
10659 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10662 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10665 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10666 if (key.objectid != bytenr ||
10667 key.type != BTRFS_EXTENT_ITEM_KEY)
10670 btrfs_release_path(&path);
10672 key.objectid = root_id;
10673 key.type = BTRFS_ROOT_ITEM_KEY;
10674 key.offset = (u64)-1;
10675 btrfs_init_path(&path);
10677 root = btrfs_read_fs_root(fs_info, &key);
10681 key.objectid = objectid;
10682 key.type = BTRFS_EXTENT_DATA_KEY;
10684 * It can be nasty as data backref offset is
10685 * file offset - file extent offset, which is smaller or
10686 * equal to original backref offset. The only special case is
10687 * overflow. So we need to special check and do further search.
10689 key.offset = offset & (1ULL << 63) ? 0 : offset;
10691 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10696 * Search afterwards to get correct one
10697 * NOTE: As we must do a comprehensive check on the data backref to
10698 * make sure the dref count also matches, we must iterate all file
10699 * extents for that inode.
10702 leaf = path.nodes[0];
10703 slot = path.slots[0];
10705 if (slot >= btrfs_header_nritems(leaf))
10707 btrfs_item_key_to_cpu(leaf, &key, slot);
10708 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10710 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10712 * Except normal disk bytenr and disk num bytes, we still
10713 * need to do extra check on dbackref offset as
10714 * dbackref offset = file_offset - file_extent_offset
10716 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10717 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10718 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10723 ret = btrfs_next_item(root, &path);
10728 btrfs_release_path(&path);
10729 if (found_count != count) {
10731 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10732 bytenr, len, root_id, objectid, offset, count, found_count);
10733 return REFERENCER_MISSING;
10739 * Check if the referencer of a shared data backref exists
10741 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10742 u64 parent, u64 bytenr)
10744 struct extent_buffer *eb;
10745 struct btrfs_key key;
10746 struct btrfs_file_extent_item *fi;
10748 int found_parent = 0;
10751 eb = read_tree_block(fs_info, parent, fs_info->nodesize, 0);
10752 if (!extent_buffer_uptodate(eb))
10755 nr = btrfs_header_nritems(eb);
10756 for (i = 0; i < nr; i++) {
10757 btrfs_item_key_to_cpu(eb, &key, i);
10758 if (key.type != BTRFS_EXTENT_DATA_KEY)
10761 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10762 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10765 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10772 free_extent_buffer(eb);
10773 if (!found_parent) {
10774 error("shared extent %llu referencer lost (parent: %llu)",
10776 return REFERENCER_MISSING;
10782 * This function will check a given extent item, including its backref and
10783 * itself (like crossing stripe boundary and type)
10785 * Since we don't use extent_record anymore, introduce new error bit
10787 static int check_extent_item(struct btrfs_fs_info *fs_info,
10788 struct extent_buffer *eb, int slot)
10790 struct btrfs_extent_item *ei;
10791 struct btrfs_extent_inline_ref *iref;
10792 struct btrfs_extent_data_ref *dref;
10796 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10797 u32 item_size = btrfs_item_size_nr(eb, slot);
10802 struct btrfs_key key;
10806 btrfs_item_key_to_cpu(eb, &key, slot);
10807 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10808 bytes_used += key.offset;
10810 bytes_used += nodesize;
10812 if (item_size < sizeof(*ei)) {
10814 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10815 * old thing when on disk format is still un-determined.
10816 * No need to care about it anymore
10818 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10822 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10823 flags = btrfs_extent_flags(eb, ei);
10825 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10827 if (metadata && check_crossing_stripes(global_info, key.objectid,
10829 error("bad metadata [%llu, %llu) crossing stripe boundary",
10830 key.objectid, key.objectid + nodesize);
10831 err |= CROSSING_STRIPE_BOUNDARY;
10834 ptr = (unsigned long)(ei + 1);
10836 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10837 /* Old EXTENT_ITEM metadata */
10838 struct btrfs_tree_block_info *info;
10840 info = (struct btrfs_tree_block_info *)ptr;
10841 level = btrfs_tree_block_level(eb, info);
10842 ptr += sizeof(struct btrfs_tree_block_info);
10844 /* New METADATA_ITEM */
10845 level = key.offset;
10847 end = (unsigned long)ei + item_size;
10850 /* Reached extent item end normally */
10854 /* Beyond extent item end, wrong item size */
10856 err |= ITEM_SIZE_MISMATCH;
10857 error("extent item at bytenr %llu slot %d has wrong size",
10862 /* Now check every backref in this extent item */
10863 iref = (struct btrfs_extent_inline_ref *)ptr;
10864 type = btrfs_extent_inline_ref_type(eb, iref);
10865 offset = btrfs_extent_inline_ref_offset(eb, iref);
10867 case BTRFS_TREE_BLOCK_REF_KEY:
10868 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10872 case BTRFS_SHARED_BLOCK_REF_KEY:
10873 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10877 case BTRFS_EXTENT_DATA_REF_KEY:
10878 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10879 ret = check_extent_data_backref(fs_info,
10880 btrfs_extent_data_ref_root(eb, dref),
10881 btrfs_extent_data_ref_objectid(eb, dref),
10882 btrfs_extent_data_ref_offset(eb, dref),
10883 key.objectid, key.offset,
10884 btrfs_extent_data_ref_count(eb, dref));
10887 case BTRFS_SHARED_DATA_REF_KEY:
10888 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10892 error("extent[%llu %d %llu] has unknown ref type: %d",
10893 key.objectid, key.type, key.offset, type);
10894 err |= UNKNOWN_TYPE;
10898 ptr += btrfs_extent_inline_ref_size(type);
10906 * Check if a dev extent item is referred correctly by its chunk
10908 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10909 struct extent_buffer *eb, int slot)
10911 struct btrfs_root *chunk_root = fs_info->chunk_root;
10912 struct btrfs_dev_extent *ptr;
10913 struct btrfs_path path;
10914 struct btrfs_key chunk_key;
10915 struct btrfs_key devext_key;
10916 struct btrfs_chunk *chunk;
10917 struct extent_buffer *l;
10921 int found_chunk = 0;
10924 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10925 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10926 length = btrfs_dev_extent_length(eb, ptr);
10928 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10929 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10930 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10932 btrfs_init_path(&path);
10933 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10938 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10939 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10944 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10947 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10948 for (i = 0; i < num_stripes; i++) {
10949 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10950 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10952 if (devid == devext_key.objectid &&
10953 offset == devext_key.offset) {
10959 btrfs_release_path(&path);
10960 if (!found_chunk) {
10962 "device extent[%llu, %llu, %llu] did not find the related chunk",
10963 devext_key.objectid, devext_key.offset, length);
10964 return REFERENCER_MISSING;
10970 * Check if the used space is correct with the dev item
10972 static int check_dev_item(struct btrfs_fs_info *fs_info,
10973 struct extent_buffer *eb, int slot)
10975 struct btrfs_root *dev_root = fs_info->dev_root;
10976 struct btrfs_dev_item *dev_item;
10977 struct btrfs_path path;
10978 struct btrfs_key key;
10979 struct btrfs_dev_extent *ptr;
10985 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10986 dev_id = btrfs_device_id(eb, dev_item);
10987 used = btrfs_device_bytes_used(eb, dev_item);
10989 key.objectid = dev_id;
10990 key.type = BTRFS_DEV_EXTENT_KEY;
10993 btrfs_init_path(&path);
10994 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10996 btrfs_item_key_to_cpu(eb, &key, slot);
10997 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10998 key.objectid, key.type, key.offset);
10999 btrfs_release_path(&path);
11000 return REFERENCER_MISSING;
11003 /* Iterate dev_extents to calculate the used space of a device */
11005 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11008 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11009 if (key.objectid > dev_id)
11011 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11014 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11015 struct btrfs_dev_extent);
11016 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11018 ret = btrfs_next_item(dev_root, &path);
11022 btrfs_release_path(&path);
11024 if (used != total) {
11025 btrfs_item_key_to_cpu(eb, &key, slot);
11027 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11028 total, used, BTRFS_ROOT_TREE_OBJECTID,
11029 BTRFS_DEV_EXTENT_KEY, dev_id);
11030 return ACCOUNTING_MISMATCH;
11036 * Check a block group item with its referener (chunk) and its used space
11037 * with extent/metadata item
11039 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11040 struct extent_buffer *eb, int slot)
11042 struct btrfs_root *extent_root = fs_info->extent_root;
11043 struct btrfs_root *chunk_root = fs_info->chunk_root;
11044 struct btrfs_block_group_item *bi;
11045 struct btrfs_block_group_item bg_item;
11046 struct btrfs_path path;
11047 struct btrfs_key bg_key;
11048 struct btrfs_key chunk_key;
11049 struct btrfs_key extent_key;
11050 struct btrfs_chunk *chunk;
11051 struct extent_buffer *leaf;
11052 struct btrfs_extent_item *ei;
11053 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11061 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11062 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11063 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11064 used = btrfs_block_group_used(&bg_item);
11065 bg_flags = btrfs_block_group_flags(&bg_item);
11067 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11068 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11069 chunk_key.offset = bg_key.objectid;
11071 btrfs_init_path(&path);
11072 /* Search for the referencer chunk */
11073 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11076 "block group[%llu %llu] did not find the related chunk item",
11077 bg_key.objectid, bg_key.offset);
11078 err |= REFERENCER_MISSING;
11080 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11081 struct btrfs_chunk);
11082 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11085 "block group[%llu %llu] related chunk item length does not match",
11086 bg_key.objectid, bg_key.offset);
11087 err |= REFERENCER_MISMATCH;
11090 btrfs_release_path(&path);
11092 /* Search from the block group bytenr */
11093 extent_key.objectid = bg_key.objectid;
11094 extent_key.type = 0;
11095 extent_key.offset = 0;
11097 btrfs_init_path(&path);
11098 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11102 /* Iterate extent tree to account used space */
11104 leaf = path.nodes[0];
11106 /* Search slot can point to the last item beyond leaf nritems */
11107 if (path.slots[0] >= btrfs_header_nritems(leaf))
11110 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11111 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11114 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11115 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11117 if (extent_key.objectid < bg_key.objectid)
11120 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11123 total += extent_key.offset;
11125 ei = btrfs_item_ptr(leaf, path.slots[0],
11126 struct btrfs_extent_item);
11127 flags = btrfs_extent_flags(leaf, ei);
11128 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11129 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11131 "bad extent[%llu, %llu) type mismatch with chunk",
11132 extent_key.objectid,
11133 extent_key.objectid + extent_key.offset);
11134 err |= CHUNK_TYPE_MISMATCH;
11136 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11137 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11138 BTRFS_BLOCK_GROUP_METADATA))) {
11140 "bad extent[%llu, %llu) type mismatch with chunk",
11141 extent_key.objectid,
11142 extent_key.objectid + nodesize);
11143 err |= CHUNK_TYPE_MISMATCH;
11147 ret = btrfs_next_item(extent_root, &path);
11153 btrfs_release_path(&path);
11155 if (total != used) {
11157 "block group[%llu %llu] used %llu but extent items used %llu",
11158 bg_key.objectid, bg_key.offset, used, total);
11159 err |= ACCOUNTING_MISMATCH;
11165 * Check a chunk item.
11166 * Including checking all referred dev_extents and block group
11168 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11169 struct extent_buffer *eb, int slot)
11171 struct btrfs_root *extent_root = fs_info->extent_root;
11172 struct btrfs_root *dev_root = fs_info->dev_root;
11173 struct btrfs_path path;
11174 struct btrfs_key chunk_key;
11175 struct btrfs_key bg_key;
11176 struct btrfs_key devext_key;
11177 struct btrfs_chunk *chunk;
11178 struct extent_buffer *leaf;
11179 struct btrfs_block_group_item *bi;
11180 struct btrfs_block_group_item bg_item;
11181 struct btrfs_dev_extent *ptr;
11193 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11194 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11195 length = btrfs_chunk_length(eb, chunk);
11196 chunk_end = chunk_key.offset + length;
11197 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11200 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11202 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11205 type = btrfs_chunk_type(eb, chunk);
11207 bg_key.objectid = chunk_key.offset;
11208 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11209 bg_key.offset = length;
11211 btrfs_init_path(&path);
11212 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11215 "chunk[%llu %llu) did not find the related block group item",
11216 chunk_key.offset, chunk_end);
11217 err |= REFERENCER_MISSING;
11219 leaf = path.nodes[0];
11220 bi = btrfs_item_ptr(leaf, path.slots[0],
11221 struct btrfs_block_group_item);
11222 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11224 if (btrfs_block_group_flags(&bg_item) != type) {
11226 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11227 chunk_key.offset, chunk_end, type,
11228 btrfs_block_group_flags(&bg_item));
11229 err |= REFERENCER_MISSING;
11233 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11234 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11235 for (i = 0; i < num_stripes; i++) {
11236 btrfs_release_path(&path);
11237 btrfs_init_path(&path);
11238 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11239 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11240 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11242 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11245 goto not_match_dev;
11247 leaf = path.nodes[0];
11248 ptr = btrfs_item_ptr(leaf, path.slots[0],
11249 struct btrfs_dev_extent);
11250 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11251 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11252 if (objectid != chunk_key.objectid ||
11253 offset != chunk_key.offset ||
11254 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11255 goto not_match_dev;
11258 err |= BACKREF_MISSING;
11260 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11261 chunk_key.objectid, chunk_end, i);
11264 btrfs_release_path(&path);
11270 * Main entry function to check known items and update related accounting info
11272 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11274 struct btrfs_fs_info *fs_info = root->fs_info;
11275 struct btrfs_key key;
11278 struct btrfs_extent_data_ref *dref;
11283 btrfs_item_key_to_cpu(eb, &key, slot);
11287 case BTRFS_EXTENT_DATA_KEY:
11288 ret = check_extent_data_item(root, eb, slot);
11291 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11292 ret = check_block_group_item(fs_info, eb, slot);
11295 case BTRFS_DEV_ITEM_KEY:
11296 ret = check_dev_item(fs_info, eb, slot);
11299 case BTRFS_CHUNK_ITEM_KEY:
11300 ret = check_chunk_item(fs_info, eb, slot);
11303 case BTRFS_DEV_EXTENT_KEY:
11304 ret = check_dev_extent_item(fs_info, eb, slot);
11307 case BTRFS_EXTENT_ITEM_KEY:
11308 case BTRFS_METADATA_ITEM_KEY:
11309 ret = check_extent_item(fs_info, eb, slot);
11312 case BTRFS_EXTENT_CSUM_KEY:
11313 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11315 case BTRFS_TREE_BLOCK_REF_KEY:
11316 ret = check_tree_block_backref(fs_info, key.offset,
11320 case BTRFS_EXTENT_DATA_REF_KEY:
11321 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11322 ret = check_extent_data_backref(fs_info,
11323 btrfs_extent_data_ref_root(eb, dref),
11324 btrfs_extent_data_ref_objectid(eb, dref),
11325 btrfs_extent_data_ref_offset(eb, dref),
11327 btrfs_extent_data_ref_count(eb, dref));
11330 case BTRFS_SHARED_BLOCK_REF_KEY:
11331 ret = check_shared_block_backref(fs_info, key.offset,
11335 case BTRFS_SHARED_DATA_REF_KEY:
11336 ret = check_shared_data_backref(fs_info, key.offset,
11344 if (++slot < btrfs_header_nritems(eb))
11351 * Helper function for later fs/subvol tree check. To determine if a tree
11352 * block should be checked.
11353 * This function will ensure only the direct referencer with lowest rootid to
11354 * check a fs/subvolume tree block.
11356 * Backref check at extent tree would detect errors like missing subvolume
11357 * tree, so we can do aggressive check to reduce duplicated checks.
11359 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11361 struct btrfs_root *extent_root = root->fs_info->extent_root;
11362 struct btrfs_key key;
11363 struct btrfs_path path;
11364 struct extent_buffer *leaf;
11366 struct btrfs_extent_item *ei;
11372 struct btrfs_extent_inline_ref *iref;
11375 btrfs_init_path(&path);
11376 key.objectid = btrfs_header_bytenr(eb);
11377 key.type = BTRFS_METADATA_ITEM_KEY;
11378 key.offset = (u64)-1;
11381 * Any failure in backref resolving means we can't determine
11382 * whom the tree block belongs to.
11383 * So in that case, we need to check that tree block
11385 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11389 ret = btrfs_previous_extent_item(extent_root, &path,
11390 btrfs_header_bytenr(eb));
11394 leaf = path.nodes[0];
11395 slot = path.slots[0];
11396 btrfs_item_key_to_cpu(leaf, &key, slot);
11397 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11399 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11400 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11402 struct btrfs_tree_block_info *info;
11404 info = (struct btrfs_tree_block_info *)(ei + 1);
11405 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11408 item_size = btrfs_item_size_nr(leaf, slot);
11409 ptr = (unsigned long)iref;
11410 end = (unsigned long)ei + item_size;
11411 while (ptr < end) {
11412 iref = (struct btrfs_extent_inline_ref *)ptr;
11413 type = btrfs_extent_inline_ref_type(leaf, iref);
11414 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11417 * We only check the tree block if current root is
11418 * the lowest referencer of it.
11420 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11421 offset < root->objectid) {
11422 btrfs_release_path(&path);
11426 ptr += btrfs_extent_inline_ref_size(type);
11429 * Normally we should also check keyed tree block ref, but that may be
11430 * very time consuming. Inlined ref should already make us skip a lot
11431 * of refs now. So skip search keyed tree block ref.
11435 btrfs_release_path(&path);
11440 * Traversal function for tree block. We will do:
11441 * 1) Skip shared fs/subvolume tree blocks
11442 * 2) Update related bytes accounting
11443 * 3) Pre-order traversal
11445 static int traverse_tree_block(struct btrfs_root *root,
11446 struct extent_buffer *node)
11448 struct extent_buffer *eb;
11449 struct btrfs_key key;
11450 struct btrfs_key drop_key;
11458 * Skip shared fs/subvolume tree block, in that case they will
11459 * be checked by referencer with lowest rootid
11461 if (is_fstree(root->objectid) && !should_check(root, node))
11464 /* Update bytes accounting */
11465 total_btree_bytes += node->len;
11466 if (fs_root_objectid(btrfs_header_owner(node)))
11467 total_fs_tree_bytes += node->len;
11468 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11469 total_extent_tree_bytes += node->len;
11470 if (!found_old_backref &&
11471 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11472 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11473 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11474 found_old_backref = 1;
11476 /* pre-order tranversal, check itself first */
11477 level = btrfs_header_level(node);
11478 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11479 btrfs_header_level(node),
11480 btrfs_header_owner(node));
11484 "check %s failed root %llu bytenr %llu level %d, force continue check",
11485 level ? "node":"leaf", root->objectid,
11486 btrfs_header_bytenr(node), btrfs_header_level(node));
11489 btree_space_waste += btrfs_leaf_free_space(root, node);
11490 ret = check_leaf_items(root, node);
11495 nr = btrfs_header_nritems(node);
11496 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11497 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11498 sizeof(struct btrfs_key_ptr);
11500 /* Then check all its children */
11501 for (i = 0; i < nr; i++) {
11502 u64 blocknr = btrfs_node_blockptr(node, i);
11504 btrfs_node_key_to_cpu(node, &key, i);
11505 if (level == root->root_item.drop_level &&
11506 is_dropped_key(&key, &drop_key))
11510 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11511 * to call the function itself.
11513 eb = read_tree_block(root->fs_info, blocknr,
11514 root->fs_info->nodesize, 0);
11515 if (extent_buffer_uptodate(eb)) {
11516 ret = traverse_tree_block(root, eb);
11519 free_extent_buffer(eb);
11526 * Low memory usage version check_chunks_and_extents.
11528 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11530 struct btrfs_path path;
11531 struct btrfs_key key;
11532 struct btrfs_root *root1;
11533 struct btrfs_root *cur_root;
11537 root1 = root->fs_info->chunk_root;
11538 ret = traverse_tree_block(root1, root1->node);
11541 root1 = root->fs_info->tree_root;
11542 ret = traverse_tree_block(root1, root1->node);
11545 btrfs_init_path(&path);
11546 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11548 key.type = BTRFS_ROOT_ITEM_KEY;
11550 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11552 error("cannot find extent treet in tree_root");
11557 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11558 if (key.type != BTRFS_ROOT_ITEM_KEY)
11560 key.offset = (u64)-1;
11562 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11563 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11566 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11567 if (IS_ERR(cur_root) || !cur_root) {
11568 error("failed to read tree: %lld", key.objectid);
11572 ret = traverse_tree_block(cur_root, cur_root->node);
11575 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11576 btrfs_free_fs_root(cur_root);
11578 ret = btrfs_next_item(root1, &path);
11584 btrfs_release_path(&path);
11588 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11589 struct btrfs_root *root, int overwrite)
11591 struct extent_buffer *c;
11592 struct extent_buffer *old = root->node;
11595 struct btrfs_disk_key disk_key = {0,0,0};
11601 extent_buffer_get(c);
11604 c = btrfs_alloc_free_block(trans, root,
11605 root->fs_info->nodesize,
11606 root->root_key.objectid,
11607 &disk_key, level, 0, 0);
11610 extent_buffer_get(c);
11614 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11615 btrfs_set_header_level(c, level);
11616 btrfs_set_header_bytenr(c, c->start);
11617 btrfs_set_header_generation(c, trans->transid);
11618 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11619 btrfs_set_header_owner(c, root->root_key.objectid);
11621 write_extent_buffer(c, root->fs_info->fsid,
11622 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11624 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11625 btrfs_header_chunk_tree_uuid(c),
11628 btrfs_mark_buffer_dirty(c);
11630 * this case can happen in the following case:
11632 * 1.overwrite previous root.
11634 * 2.reinit reloc data root, this is because we skip pin
11635 * down reloc data tree before which means we can allocate
11636 * same block bytenr here.
11638 if (old->start == c->start) {
11639 btrfs_set_root_generation(&root->root_item,
11641 root->root_item.level = btrfs_header_level(root->node);
11642 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11643 &root->root_key, &root->root_item);
11645 free_extent_buffer(c);
11649 free_extent_buffer(old);
11651 add_root_to_dirty_list(root);
11655 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11656 struct extent_buffer *eb, int tree_root)
11658 struct extent_buffer *tmp;
11659 struct btrfs_root_item *ri;
11660 struct btrfs_key key;
11662 int level = btrfs_header_level(eb);
11668 * If we have pinned this block before, don't pin it again.
11669 * This can not only avoid forever loop with broken filesystem
11670 * but also give us some speedups.
11672 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11673 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11676 btrfs_pin_extent(fs_info, eb->start, eb->len);
11678 nritems = btrfs_header_nritems(eb);
11679 for (i = 0; i < nritems; i++) {
11681 btrfs_item_key_to_cpu(eb, &key, i);
11682 if (key.type != BTRFS_ROOT_ITEM_KEY)
11684 /* Skip the extent root and reloc roots */
11685 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11686 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11687 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11689 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11690 bytenr = btrfs_disk_root_bytenr(eb, ri);
11693 * If at any point we start needing the real root we
11694 * will have to build a stump root for the root we are
11695 * in, but for now this doesn't actually use the root so
11696 * just pass in extent_root.
11698 tmp = read_tree_block(fs_info, bytenr, fs_info->nodesize, 0);
11699 if (!extent_buffer_uptodate(tmp)) {
11700 fprintf(stderr, "Error reading root block\n");
11703 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11704 free_extent_buffer(tmp);
11708 bytenr = btrfs_node_blockptr(eb, i);
11710 /* If we aren't the tree root don't read the block */
11711 if (level == 1 && !tree_root) {
11712 btrfs_pin_extent(fs_info, bytenr,
11713 fs_info->nodesize);
11717 tmp = read_tree_block(fs_info, bytenr,
11718 fs_info->nodesize, 0);
11719 if (!extent_buffer_uptodate(tmp)) {
11720 fprintf(stderr, "Error reading tree block\n");
11723 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11724 free_extent_buffer(tmp);
11733 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11737 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11741 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11744 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11746 struct btrfs_block_group_cache *cache;
11747 struct btrfs_path path;
11748 struct extent_buffer *leaf;
11749 struct btrfs_chunk *chunk;
11750 struct btrfs_key key;
11754 btrfs_init_path(&path);
11756 key.type = BTRFS_CHUNK_ITEM_KEY;
11758 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11760 btrfs_release_path(&path);
11765 * We do this in case the block groups were screwed up and had alloc
11766 * bits that aren't actually set on the chunks. This happens with
11767 * restored images every time and could happen in real life I guess.
11769 fs_info->avail_data_alloc_bits = 0;
11770 fs_info->avail_metadata_alloc_bits = 0;
11771 fs_info->avail_system_alloc_bits = 0;
11773 /* First we need to create the in-memory block groups */
11775 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11776 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11778 btrfs_release_path(&path);
11786 leaf = path.nodes[0];
11787 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11788 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11793 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11794 btrfs_add_block_group(fs_info, 0,
11795 btrfs_chunk_type(leaf, chunk),
11796 key.objectid, key.offset,
11797 btrfs_chunk_length(leaf, chunk));
11798 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11799 key.offset + btrfs_chunk_length(leaf, chunk));
11804 cache = btrfs_lookup_first_block_group(fs_info, start);
11808 start = cache->key.objectid + cache->key.offset;
11811 btrfs_release_path(&path);
11815 static int reset_balance(struct btrfs_trans_handle *trans,
11816 struct btrfs_fs_info *fs_info)
11818 struct btrfs_root *root = fs_info->tree_root;
11819 struct btrfs_path path;
11820 struct extent_buffer *leaf;
11821 struct btrfs_key key;
11822 int del_slot, del_nr = 0;
11826 btrfs_init_path(&path);
11827 key.objectid = BTRFS_BALANCE_OBJECTID;
11828 key.type = BTRFS_BALANCE_ITEM_KEY;
11830 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11835 goto reinit_data_reloc;
11840 ret = btrfs_del_item(trans, root, &path);
11843 btrfs_release_path(&path);
11845 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11846 key.type = BTRFS_ROOT_ITEM_KEY;
11848 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11852 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11857 ret = btrfs_del_items(trans, root, &path,
11864 btrfs_release_path(&path);
11867 ret = btrfs_search_slot(trans, root, &key, &path,
11874 leaf = path.nodes[0];
11875 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11876 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11878 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11883 del_slot = path.slots[0];
11892 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11896 btrfs_release_path(&path);
11899 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11900 key.type = BTRFS_ROOT_ITEM_KEY;
11901 key.offset = (u64)-1;
11902 root = btrfs_read_fs_root(fs_info, &key);
11903 if (IS_ERR(root)) {
11904 fprintf(stderr, "Error reading data reloc tree\n");
11905 ret = PTR_ERR(root);
11908 record_root_in_trans(trans, root);
11909 ret = btrfs_fsck_reinit_root(trans, root, 0);
11912 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11914 btrfs_release_path(&path);
11918 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11919 struct btrfs_fs_info *fs_info)
11925 * The only reason we don't do this is because right now we're just
11926 * walking the trees we find and pinning down their bytes, we don't look
11927 * at any of the leaves. In order to do mixed groups we'd have to check
11928 * the leaves of any fs roots and pin down the bytes for any file
11929 * extents we find. Not hard but why do it if we don't have to?
11931 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11932 fprintf(stderr, "We don't support re-initing the extent tree "
11933 "for mixed block groups yet, please notify a btrfs "
11934 "developer you want to do this so they can add this "
11935 "functionality.\n");
11940 * first we need to walk all of the trees except the extent tree and pin
11941 * down the bytes that are in use so we don't overwrite any existing
11944 ret = pin_metadata_blocks(fs_info);
11946 fprintf(stderr, "error pinning down used bytes\n");
11951 * Need to drop all the block groups since we're going to recreate all
11954 btrfs_free_block_groups(fs_info);
11955 ret = reset_block_groups(fs_info);
11957 fprintf(stderr, "error resetting the block groups\n");
11961 /* Ok we can allocate now, reinit the extent root */
11962 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11964 fprintf(stderr, "extent root initialization failed\n");
11966 * When the transaction code is updated we should end the
11967 * transaction, but for now progs only knows about commit so
11968 * just return an error.
11974 * Now we have all the in-memory block groups setup so we can make
11975 * allocations properly, and the metadata we care about is safe since we
11976 * pinned all of it above.
11979 struct btrfs_block_group_cache *cache;
11981 cache = btrfs_lookup_first_block_group(fs_info, start);
11984 start = cache->key.objectid + cache->key.offset;
11985 ret = btrfs_insert_item(trans, fs_info->extent_root,
11986 &cache->key, &cache->item,
11987 sizeof(cache->item));
11989 fprintf(stderr, "Error adding block group\n");
11992 btrfs_extent_post_op(trans, fs_info->extent_root);
11995 ret = reset_balance(trans, fs_info);
11997 fprintf(stderr, "error resetting the pending balance\n");
12002 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
12004 struct btrfs_path path;
12005 struct btrfs_trans_handle *trans;
12006 struct btrfs_key key;
12009 printf("Recowing metadata block %llu\n", eb->start);
12010 key.objectid = btrfs_header_owner(eb);
12011 key.type = BTRFS_ROOT_ITEM_KEY;
12012 key.offset = (u64)-1;
12014 root = btrfs_read_fs_root(root->fs_info, &key);
12015 if (IS_ERR(root)) {
12016 fprintf(stderr, "Couldn't find owner root %llu\n",
12018 return PTR_ERR(root);
12021 trans = btrfs_start_transaction(root, 1);
12023 return PTR_ERR(trans);
12025 btrfs_init_path(&path);
12026 path.lowest_level = btrfs_header_level(eb);
12027 if (path.lowest_level)
12028 btrfs_node_key_to_cpu(eb, &key, 0);
12030 btrfs_item_key_to_cpu(eb, &key, 0);
12032 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12033 btrfs_commit_transaction(trans, root);
12034 btrfs_release_path(&path);
12038 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12040 struct btrfs_path path;
12041 struct btrfs_trans_handle *trans;
12042 struct btrfs_key key;
12045 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12046 bad->key.type, bad->key.offset);
12047 key.objectid = bad->root_id;
12048 key.type = BTRFS_ROOT_ITEM_KEY;
12049 key.offset = (u64)-1;
12051 root = btrfs_read_fs_root(root->fs_info, &key);
12052 if (IS_ERR(root)) {
12053 fprintf(stderr, "Couldn't find owner root %llu\n",
12055 return PTR_ERR(root);
12058 trans = btrfs_start_transaction(root, 1);
12060 return PTR_ERR(trans);
12062 btrfs_init_path(&path);
12063 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12069 ret = btrfs_del_item(trans, root, &path);
12071 btrfs_commit_transaction(trans, root);
12072 btrfs_release_path(&path);
12076 static int zero_log_tree(struct btrfs_root *root)
12078 struct btrfs_trans_handle *trans;
12081 trans = btrfs_start_transaction(root, 1);
12082 if (IS_ERR(trans)) {
12083 ret = PTR_ERR(trans);
12086 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12087 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12088 ret = btrfs_commit_transaction(trans, root);
12092 static int populate_csum(struct btrfs_trans_handle *trans,
12093 struct btrfs_root *csum_root, char *buf, u64 start,
12096 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12101 while (offset < len) {
12102 sectorsize = fs_info->sectorsize;
12103 ret = read_extent_data(fs_info, buf, start + offset,
12107 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12108 start + offset, buf, sectorsize);
12111 offset += sectorsize;
12116 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12117 struct btrfs_root *csum_root,
12118 struct btrfs_root *cur_root)
12120 struct btrfs_path path;
12121 struct btrfs_key key;
12122 struct extent_buffer *node;
12123 struct btrfs_file_extent_item *fi;
12130 buf = malloc(cur_root->fs_info->sectorsize);
12134 btrfs_init_path(&path);
12138 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12141 /* Iterate all regular file extents and fill its csum */
12143 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12145 if (key.type != BTRFS_EXTENT_DATA_KEY)
12147 node = path.nodes[0];
12148 slot = path.slots[0];
12149 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12150 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12152 start = btrfs_file_extent_disk_bytenr(node, fi);
12153 len = btrfs_file_extent_disk_num_bytes(node, fi);
12155 ret = populate_csum(trans, csum_root, buf, start, len);
12156 if (ret == -EEXIST)
12162 * TODO: if next leaf is corrupted, jump to nearest next valid
12165 ret = btrfs_next_item(cur_root, &path);
12175 btrfs_release_path(&path);
12180 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12181 struct btrfs_root *csum_root)
12183 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12184 struct btrfs_path path;
12185 struct btrfs_root *tree_root = fs_info->tree_root;
12186 struct btrfs_root *cur_root;
12187 struct extent_buffer *node;
12188 struct btrfs_key key;
12192 btrfs_init_path(&path);
12193 key.objectid = BTRFS_FS_TREE_OBJECTID;
12195 key.type = BTRFS_ROOT_ITEM_KEY;
12196 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12205 node = path.nodes[0];
12206 slot = path.slots[0];
12207 btrfs_item_key_to_cpu(node, &key, slot);
12208 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12210 if (key.type != BTRFS_ROOT_ITEM_KEY)
12212 if (!is_fstree(key.objectid))
12214 key.offset = (u64)-1;
12216 cur_root = btrfs_read_fs_root(fs_info, &key);
12217 if (IS_ERR(cur_root) || !cur_root) {
12218 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12222 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12227 ret = btrfs_next_item(tree_root, &path);
12237 btrfs_release_path(&path);
12241 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12242 struct btrfs_root *csum_root)
12244 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12245 struct btrfs_path path;
12246 struct btrfs_extent_item *ei;
12247 struct extent_buffer *leaf;
12249 struct btrfs_key key;
12252 btrfs_init_path(&path);
12254 key.type = BTRFS_EXTENT_ITEM_KEY;
12256 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12258 btrfs_release_path(&path);
12262 buf = malloc(csum_root->fs_info->sectorsize);
12264 btrfs_release_path(&path);
12269 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12270 ret = btrfs_next_leaf(extent_root, &path);
12278 leaf = path.nodes[0];
12280 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12281 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12286 ei = btrfs_item_ptr(leaf, path.slots[0],
12287 struct btrfs_extent_item);
12288 if (!(btrfs_extent_flags(leaf, ei) &
12289 BTRFS_EXTENT_FLAG_DATA)) {
12294 ret = populate_csum(trans, csum_root, buf, key.objectid,
12301 btrfs_release_path(&path);
12307 * Recalculate the csum and put it into the csum tree.
12309 * Extent tree init will wipe out all the extent info, so in that case, we
12310 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12311 * will use fs/subvol trees to init the csum tree.
12313 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12314 struct btrfs_root *csum_root,
12315 int search_fs_tree)
12317 if (search_fs_tree)
12318 return fill_csum_tree_from_fs(trans, csum_root);
12320 return fill_csum_tree_from_extent(trans, csum_root);
12323 static void free_roots_info_cache(void)
12325 if (!roots_info_cache)
12328 while (!cache_tree_empty(roots_info_cache)) {
12329 struct cache_extent *entry;
12330 struct root_item_info *rii;
12332 entry = first_cache_extent(roots_info_cache);
12335 remove_cache_extent(roots_info_cache, entry);
12336 rii = container_of(entry, struct root_item_info, cache_extent);
12340 free(roots_info_cache);
12341 roots_info_cache = NULL;
12344 static int build_roots_info_cache(struct btrfs_fs_info *info)
12347 struct btrfs_key key;
12348 struct extent_buffer *leaf;
12349 struct btrfs_path path;
12351 if (!roots_info_cache) {
12352 roots_info_cache = malloc(sizeof(*roots_info_cache));
12353 if (!roots_info_cache)
12355 cache_tree_init(roots_info_cache);
12358 btrfs_init_path(&path);
12360 key.type = BTRFS_EXTENT_ITEM_KEY;
12362 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12365 leaf = path.nodes[0];
12368 struct btrfs_key found_key;
12369 struct btrfs_extent_item *ei;
12370 struct btrfs_extent_inline_ref *iref;
12371 int slot = path.slots[0];
12376 struct cache_extent *entry;
12377 struct root_item_info *rii;
12379 if (slot >= btrfs_header_nritems(leaf)) {
12380 ret = btrfs_next_leaf(info->extent_root, &path);
12387 leaf = path.nodes[0];
12388 slot = path.slots[0];
12391 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12393 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12394 found_key.type != BTRFS_METADATA_ITEM_KEY)
12397 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12398 flags = btrfs_extent_flags(leaf, ei);
12400 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12401 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12404 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12405 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12406 level = found_key.offset;
12408 struct btrfs_tree_block_info *binfo;
12410 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12411 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12412 level = btrfs_tree_block_level(leaf, binfo);
12416 * For a root extent, it must be of the following type and the
12417 * first (and only one) iref in the item.
12419 type = btrfs_extent_inline_ref_type(leaf, iref);
12420 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12423 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12424 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12426 rii = malloc(sizeof(struct root_item_info));
12431 rii->cache_extent.start = root_id;
12432 rii->cache_extent.size = 1;
12433 rii->level = (u8)-1;
12434 entry = &rii->cache_extent;
12435 ret = insert_cache_extent(roots_info_cache, entry);
12438 rii = container_of(entry, struct root_item_info,
12442 ASSERT(rii->cache_extent.start == root_id);
12443 ASSERT(rii->cache_extent.size == 1);
12445 if (level > rii->level || rii->level == (u8)-1) {
12446 rii->level = level;
12447 rii->bytenr = found_key.objectid;
12448 rii->gen = btrfs_extent_generation(leaf, ei);
12449 rii->node_count = 1;
12450 } else if (level == rii->level) {
12458 btrfs_release_path(&path);
12463 static int maybe_repair_root_item(struct btrfs_path *path,
12464 const struct btrfs_key *root_key,
12465 const int read_only_mode)
12467 const u64 root_id = root_key->objectid;
12468 struct cache_extent *entry;
12469 struct root_item_info *rii;
12470 struct btrfs_root_item ri;
12471 unsigned long offset;
12473 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12476 "Error: could not find extent items for root %llu\n",
12477 root_key->objectid);
12481 rii = container_of(entry, struct root_item_info, cache_extent);
12482 ASSERT(rii->cache_extent.start == root_id);
12483 ASSERT(rii->cache_extent.size == 1);
12485 if (rii->node_count != 1) {
12487 "Error: could not find btree root extent for root %llu\n",
12492 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12493 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12495 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12496 btrfs_root_level(&ri) != rii->level ||
12497 btrfs_root_generation(&ri) != rii->gen) {
12500 * If we're in repair mode but our caller told us to not update
12501 * the root item, i.e. just check if it needs to be updated, don't
12502 * print this message, since the caller will call us again shortly
12503 * for the same root item without read only mode (the caller will
12504 * open a transaction first).
12506 if (!(read_only_mode && repair))
12508 "%sroot item for root %llu,"
12509 " current bytenr %llu, current gen %llu, current level %u,"
12510 " new bytenr %llu, new gen %llu, new level %u\n",
12511 (read_only_mode ? "" : "fixing "),
12513 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12514 btrfs_root_level(&ri),
12515 rii->bytenr, rii->gen, rii->level);
12517 if (btrfs_root_generation(&ri) > rii->gen) {
12519 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12520 root_id, btrfs_root_generation(&ri), rii->gen);
12524 if (!read_only_mode) {
12525 btrfs_set_root_bytenr(&ri, rii->bytenr);
12526 btrfs_set_root_level(&ri, rii->level);
12527 btrfs_set_root_generation(&ri, rii->gen);
12528 write_extent_buffer(path->nodes[0], &ri,
12529 offset, sizeof(ri));
12539 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12540 * caused read-only snapshots to be corrupted if they were created at a moment
12541 * when the source subvolume/snapshot had orphan items. The issue was that the
12542 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12543 * node instead of the post orphan cleanup root node.
12544 * So this function, and its callees, just detects and fixes those cases. Even
12545 * though the regression was for read-only snapshots, this function applies to
12546 * any snapshot/subvolume root.
12547 * This must be run before any other repair code - not doing it so, makes other
12548 * repair code delete or modify backrefs in the extent tree for example, which
12549 * will result in an inconsistent fs after repairing the root items.
12551 static int repair_root_items(struct btrfs_fs_info *info)
12553 struct btrfs_path path;
12554 struct btrfs_key key;
12555 struct extent_buffer *leaf;
12556 struct btrfs_trans_handle *trans = NULL;
12559 int need_trans = 0;
12561 btrfs_init_path(&path);
12563 ret = build_roots_info_cache(info);
12567 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12568 key.type = BTRFS_ROOT_ITEM_KEY;
12573 * Avoid opening and committing transactions if a leaf doesn't have
12574 * any root items that need to be fixed, so that we avoid rotating
12575 * backup roots unnecessarily.
12578 trans = btrfs_start_transaction(info->tree_root, 1);
12579 if (IS_ERR(trans)) {
12580 ret = PTR_ERR(trans);
12585 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12589 leaf = path.nodes[0];
12592 struct btrfs_key found_key;
12594 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12595 int no_more_keys = find_next_key(&path, &key);
12597 btrfs_release_path(&path);
12599 ret = btrfs_commit_transaction(trans,
12611 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12613 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12615 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12618 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12622 if (!trans && repair) {
12625 btrfs_release_path(&path);
12635 free_roots_info_cache();
12636 btrfs_release_path(&path);
12638 btrfs_commit_transaction(trans, info->tree_root);
12645 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12647 struct btrfs_trans_handle *trans;
12648 struct btrfs_block_group_cache *bg_cache;
12652 /* Clear all free space cache inodes and its extent data */
12654 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12657 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12660 current = bg_cache->key.objectid + bg_cache->key.offset;
12663 /* Don't forget to set cache_generation to -1 */
12664 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12665 if (IS_ERR(trans)) {
12666 error("failed to update super block cache generation");
12667 return PTR_ERR(trans);
12669 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12670 btrfs_commit_transaction(trans, fs_info->tree_root);
12675 const char * const cmd_check_usage[] = {
12676 "btrfs check [options] <device>",
12677 "Check structural integrity of a filesystem (unmounted).",
12678 "Check structural integrity of an unmounted filesystem. Verify internal",
12679 "trees' consistency and item connectivity. In the repair mode try to",
12680 "fix the problems found. ",
12681 "WARNING: the repair mode is considered dangerous",
12683 "-s|--super <superblock> use this superblock copy",
12684 "-b|--backup use the first valid backup root copy",
12685 "--repair try to repair the filesystem",
12686 "--readonly run in read-only mode (default)",
12687 "--init-csum-tree create a new CRC tree",
12688 "--init-extent-tree create a new extent tree",
12689 "--mode <MODE> allows choice of memory/IO trade-offs",
12690 " where MODE is one of:",
12691 " original - read inodes and extents to memory (requires",
12692 " more memory, does less IO)",
12693 " lowmem - try to use less memory but read blocks again",
12695 "--check-data-csum verify checksums of data blocks",
12696 "-Q|--qgroup-report print a report on qgroup consistency",
12697 "-E|--subvol-extents <subvolid>",
12698 " print subvolume extents and sharing state",
12699 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12700 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12701 "-p|--progress indicate progress",
12702 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12706 int cmd_check(int argc, char **argv)
12708 struct cache_tree root_cache;
12709 struct btrfs_root *root;
12710 struct btrfs_fs_info *info;
12713 u64 tree_root_bytenr = 0;
12714 u64 chunk_root_bytenr = 0;
12715 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12719 int init_csum_tree = 0;
12721 int clear_space_cache = 0;
12722 int qgroup_report = 0;
12723 int qgroups_repaired = 0;
12724 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12728 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12729 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12730 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12731 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12732 static const struct option long_options[] = {
12733 { "super", required_argument, NULL, 's' },
12734 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12735 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12736 { "init-csum-tree", no_argument, NULL,
12737 GETOPT_VAL_INIT_CSUM },
12738 { "init-extent-tree", no_argument, NULL,
12739 GETOPT_VAL_INIT_EXTENT },
12740 { "check-data-csum", no_argument, NULL,
12741 GETOPT_VAL_CHECK_CSUM },
12742 { "backup", no_argument, NULL, 'b' },
12743 { "subvol-extents", required_argument, NULL, 'E' },
12744 { "qgroup-report", no_argument, NULL, 'Q' },
12745 { "tree-root", required_argument, NULL, 'r' },
12746 { "chunk-root", required_argument, NULL,
12747 GETOPT_VAL_CHUNK_TREE },
12748 { "progress", no_argument, NULL, 'p' },
12749 { "mode", required_argument, NULL,
12751 { "clear-space-cache", required_argument, NULL,
12752 GETOPT_VAL_CLEAR_SPACE_CACHE},
12753 { NULL, 0, NULL, 0}
12756 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12760 case 'a': /* ignored */ break;
12762 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12765 num = arg_strtou64(optarg);
12766 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12768 "super mirror should be less than %d",
12769 BTRFS_SUPER_MIRROR_MAX);
12772 bytenr = btrfs_sb_offset(((int)num));
12773 printf("using SB copy %llu, bytenr %llu\n", num,
12774 (unsigned long long)bytenr);
12780 subvolid = arg_strtou64(optarg);
12783 tree_root_bytenr = arg_strtou64(optarg);
12785 case GETOPT_VAL_CHUNK_TREE:
12786 chunk_root_bytenr = arg_strtou64(optarg);
12789 ctx.progress_enabled = true;
12793 usage(cmd_check_usage);
12794 case GETOPT_VAL_REPAIR:
12795 printf("enabling repair mode\n");
12797 ctree_flags |= OPEN_CTREE_WRITES;
12799 case GETOPT_VAL_READONLY:
12802 case GETOPT_VAL_INIT_CSUM:
12803 printf("Creating a new CRC tree\n");
12804 init_csum_tree = 1;
12806 ctree_flags |= OPEN_CTREE_WRITES;
12808 case GETOPT_VAL_INIT_EXTENT:
12809 init_extent_tree = 1;
12810 ctree_flags |= (OPEN_CTREE_WRITES |
12811 OPEN_CTREE_NO_BLOCK_GROUPS);
12814 case GETOPT_VAL_CHECK_CSUM:
12815 check_data_csum = 1;
12817 case GETOPT_VAL_MODE:
12818 check_mode = parse_check_mode(optarg);
12819 if (check_mode == CHECK_MODE_UNKNOWN) {
12820 error("unknown mode: %s", optarg);
12824 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12825 if (strcmp(optarg, "v1") == 0) {
12826 clear_space_cache = 1;
12827 } else if (strcmp(optarg, "v2") == 0) {
12828 clear_space_cache = 2;
12829 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12832 "invalid argument to --clear-space-cache, must be v1 or v2");
12835 ctree_flags |= OPEN_CTREE_WRITES;
12840 if (check_argc_exact(argc - optind, 1))
12841 usage(cmd_check_usage);
12843 if (ctx.progress_enabled) {
12844 ctx.tp = TASK_NOTHING;
12845 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12848 /* This check is the only reason for --readonly to exist */
12849 if (readonly && repair) {
12850 error("repair options are not compatible with --readonly");
12855 * Not supported yet
12857 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12858 error("low memory mode doesn't support repair yet");
12863 cache_tree_init(&root_cache);
12865 if((ret = check_mounted(argv[optind])) < 0) {
12866 error("could not check mount status: %s", strerror(-ret));
12870 error("%s is currently mounted, aborting", argv[optind]);
12876 /* only allow partial opening under repair mode */
12878 ctree_flags |= OPEN_CTREE_PARTIAL;
12880 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12881 chunk_root_bytenr, ctree_flags);
12883 error("cannot open file system");
12889 global_info = info;
12890 root = info->fs_root;
12891 if (clear_space_cache == 1) {
12892 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12894 "free space cache v2 detected, use --clear-space-cache v2");
12898 printf("Clearing free space cache\n");
12899 ret = clear_free_space_cache(info);
12901 error("failed to clear free space cache");
12904 printf("Free space cache cleared\n");
12907 } else if (clear_space_cache == 2) {
12908 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12909 printf("no free space cache v2 to clear\n");
12913 printf("Clear free space cache v2\n");
12914 ret = btrfs_clear_free_space_tree(info);
12916 error("failed to clear free space cache v2: %d", ret);
12919 printf("free space cache v2 cleared\n");
12925 * repair mode will force us to commit transaction which
12926 * will make us fail to load log tree when mounting.
12928 if (repair && btrfs_super_log_root(info->super_copy)) {
12929 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12935 ret = zero_log_tree(root);
12938 error("failed to zero log tree: %d", ret);
12943 uuid_unparse(info->super_copy->fsid, uuidbuf);
12944 if (qgroup_report) {
12945 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12947 ret = qgroup_verify_all(info);
12954 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12955 subvolid, argv[optind], uuidbuf);
12956 ret = print_extent_state(info, subvolid);
12960 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12962 if (!extent_buffer_uptodate(info->tree_root->node) ||
12963 !extent_buffer_uptodate(info->dev_root->node) ||
12964 !extent_buffer_uptodate(info->chunk_root->node)) {
12965 error("critical roots corrupted, unable to check the filesystem");
12971 if (init_extent_tree || init_csum_tree) {
12972 struct btrfs_trans_handle *trans;
12974 trans = btrfs_start_transaction(info->extent_root, 0);
12975 if (IS_ERR(trans)) {
12976 error("error starting transaction");
12977 ret = PTR_ERR(trans);
12982 if (init_extent_tree) {
12983 printf("Creating a new extent tree\n");
12984 ret = reinit_extent_tree(trans, info);
12990 if (init_csum_tree) {
12991 printf("Reinitialize checksum tree\n");
12992 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12994 error("checksum tree initialization failed: %d",
13001 ret = fill_csum_tree(trans, info->csum_root,
13005 error("checksum tree refilling failed: %d", ret);
13010 * Ok now we commit and run the normal fsck, which will add
13011 * extent entries for all of the items it finds.
13013 ret = btrfs_commit_transaction(trans, info->extent_root);
13018 if (!extent_buffer_uptodate(info->extent_root->node)) {
13019 error("critical: extent_root, unable to check the filesystem");
13024 if (!extent_buffer_uptodate(info->csum_root->node)) {
13025 error("critical: csum_root, unable to check the filesystem");
13031 if (!ctx.progress_enabled)
13032 fprintf(stderr, "checking extents\n");
13033 if (check_mode == CHECK_MODE_LOWMEM)
13034 ret = check_chunks_and_extents_v2(root);
13036 ret = check_chunks_and_extents(root);
13040 "errors found in extent allocation tree or chunk allocation");
13042 ret = repair_root_items(info);
13045 error("failed to repair root items: %s", strerror(-ret));
13049 fprintf(stderr, "Fixed %d roots.\n", ret);
13051 } else if (ret > 0) {
13053 "Found %d roots with an outdated root item.\n",
13056 "Please run a filesystem check with the option --repair to fix them.\n");
13062 if (!ctx.progress_enabled) {
13063 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064 fprintf(stderr, "checking free space tree\n");
13066 fprintf(stderr, "checking free space cache\n");
13068 ret = check_space_cache(root);
13071 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13072 error("errors found in free space tree");
13074 error("errors found in free space cache");
13079 * We used to have to have these hole extents in between our real
13080 * extents so if we don't have this flag set we need to make sure there
13081 * are no gaps in the file extents for inodes, otherwise we can just
13082 * ignore it when this happens.
13084 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13085 if (!ctx.progress_enabled)
13086 fprintf(stderr, "checking fs roots\n");
13087 if (check_mode == CHECK_MODE_LOWMEM)
13088 ret = check_fs_roots_v2(root->fs_info);
13090 ret = check_fs_roots(root, &root_cache);
13093 error("errors found in fs roots");
13097 fprintf(stderr, "checking csums\n");
13098 ret = check_csums(root);
13101 error("errors found in csum tree");
13105 fprintf(stderr, "checking root refs\n");
13106 /* For low memory mode, check_fs_roots_v2 handles root refs */
13107 if (check_mode != CHECK_MODE_LOWMEM) {
13108 ret = check_root_refs(root, &root_cache);
13111 error("errors found in root refs");
13116 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13117 struct extent_buffer *eb;
13119 eb = list_first_entry(&root->fs_info->recow_ebs,
13120 struct extent_buffer, recow);
13121 list_del_init(&eb->recow);
13122 ret = recow_extent_buffer(root, eb);
13125 error("fails to fix transid errors");
13130 while (!list_empty(&delete_items)) {
13131 struct bad_item *bad;
13133 bad = list_first_entry(&delete_items, struct bad_item, list);
13134 list_del_init(&bad->list);
13136 ret = delete_bad_item(root, bad);
13142 if (info->quota_enabled) {
13143 fprintf(stderr, "checking quota groups\n");
13144 ret = qgroup_verify_all(info);
13147 error("failed to check quota groups");
13151 ret = repair_qgroups(info, &qgroups_repaired);
13154 error("failed to repair quota groups");
13160 if (!list_empty(&root->fs_info->recow_ebs)) {
13161 error("transid errors in file system");
13166 if (found_old_backref) { /*
13167 * there was a disk format change when mixed
13168 * backref was in testing tree. The old format
13169 * existed about one week.
13171 printf("\n * Found old mixed backref format. "
13172 "The old format is not supported! *"
13173 "\n * Please mount the FS in readonly mode, "
13174 "backup data and re-format the FS. *\n\n");
13177 printf("found %llu bytes used, ",
13178 (unsigned long long)bytes_used);
13180 printf("error(s) found\n");
13182 printf("no error found\n");
13183 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13184 printf("total tree bytes: %llu\n",
13185 (unsigned long long)total_btree_bytes);
13186 printf("total fs tree bytes: %llu\n",
13187 (unsigned long long)total_fs_tree_bytes);
13188 printf("total extent tree bytes: %llu\n",
13189 (unsigned long long)total_extent_tree_bytes);
13190 printf("btree space waste bytes: %llu\n",
13191 (unsigned long long)btree_space_waste);
13192 printf("file data blocks allocated: %llu\n referenced %llu\n",
13193 (unsigned long long)data_bytes_allocated,
13194 (unsigned long long)data_bytes_referenced);
13196 free_qgroup_counts();
13197 free_root_recs_tree(&root_cache);
13201 if (ctx.progress_enabled)
13202 task_deinit(ctx.info);