2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
878 node = container_of(cache, struct ptr_node, cache);
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
888 rec = calloc(1, sizeof(*rec));
890 return ERR_PTR(-ENOMEM);
892 rec->extent_start = (u64)-1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
910 ret = insert_cache_extent(inode_cache, &node->cache);
912 return ERR_PTR(-EEXIST);
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
962 if (!rec->found_inode_item)
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
978 if (!rec->checked || rec->merging)
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1069 if (backref->dir != dir || backref->namelen != namelen)
1071 if (memcmp(name, backref->name, namelen))
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1079 memset(backref, 0, sizeof(*backref));
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1137 maybe_free_inode_rec(inode_cache, rec);
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1234 if (--src_node->refs == 0)
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1242 cache = search_cache_extent(src, 0);
1244 node = container_of(cache, struct ptr_node, cache);
1246 cache = next_cache_extent(cache);
1249 remove_cache_extent(src, &node->cache);
1252 ins = malloc(sizeof(*ins));
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1302 free_inode_rec(rec);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1316 node = container_of(cache, struct shared_node, cache);
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1336 ret = insert_cache_extent(shared, &node->cache);
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1348 if (level == wc->active_node)
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1389 if (level == wc->root_level)
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1408 BUG_ON(node->refs < 2);
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1440 btrfs_release_path(&path);
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1476 btrfs_release_path(&path);
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1522 len = min_t(u32, total - cur - sizeof(*di),
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (key->type == BTRFS_DIR_ITEM_KEY &&
1532 key->offset != btrfs_name_hash(namebuf, len)) {
1533 rec->errors |= I_ERR_ODD_DIR_ITEM;
1534 error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1535 key->objectid, key->offset, namebuf, len, filetype,
1536 key->offset, btrfs_name_hash(namebuf, len));
1539 if (location.type == BTRFS_INODE_ITEM_KEY) {
1540 add_inode_backref(inode_cache, location.objectid,
1541 key->objectid, key->offset, namebuf,
1542 len, filetype, key->type, error);
1543 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1544 add_inode_backref(root_cache, location.objectid,
1545 key->objectid, key->offset,
1546 namebuf, len, filetype,
1549 fprintf(stderr, "invalid location in dir item %u\n",
1551 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1552 key->objectid, key->offset, namebuf,
1553 len, filetype, key->type, error);
1556 len = sizeof(*di) + name_len + data_len;
1557 di = (struct btrfs_dir_item *)((char *)di + len);
1560 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1561 rec->errors |= I_ERR_DUP_DIR_INDEX;
1566 static int process_inode_ref(struct extent_buffer *eb,
1567 int slot, struct btrfs_key *key,
1568 struct shared_node *active_node)
1576 struct cache_tree *inode_cache;
1577 struct btrfs_inode_ref *ref;
1578 char namebuf[BTRFS_NAME_LEN];
1580 inode_cache = &active_node->inode_cache;
1582 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1583 total = btrfs_item_size_nr(eb, slot);
1584 while (cur < total) {
1585 name_len = btrfs_inode_ref_name_len(eb, ref);
1586 index = btrfs_inode_ref_index(eb, ref);
1588 /* inode_ref + namelen should not cross item boundary */
1589 if (cur + sizeof(*ref) + name_len > total ||
1590 name_len > BTRFS_NAME_LEN) {
1591 if (total < cur + sizeof(*ref))
1594 /* Still try to read out the remaining part */
1595 len = min_t(u32, total - cur - sizeof(*ref),
1597 error = REF_ERR_NAME_TOO_LONG;
1603 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, key->offset,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*ref) + name_len;
1608 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1614 static int process_inode_extref(struct extent_buffer *eb,
1615 int slot, struct btrfs_key *key,
1616 struct shared_node *active_node)
1625 struct cache_tree *inode_cache;
1626 struct btrfs_inode_extref *extref;
1627 char namebuf[BTRFS_NAME_LEN];
1629 inode_cache = &active_node->inode_cache;
1631 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1632 total = btrfs_item_size_nr(eb, slot);
1633 while (cur < total) {
1634 name_len = btrfs_inode_extref_name_len(eb, extref);
1635 index = btrfs_inode_extref_index(eb, extref);
1636 parent = btrfs_inode_extref_parent(eb, extref);
1637 if (name_len <= BTRFS_NAME_LEN) {
1641 len = BTRFS_NAME_LEN;
1642 error = REF_ERR_NAME_TOO_LONG;
1644 read_extent_buffer(eb, namebuf,
1645 (unsigned long)(extref + 1), len);
1646 add_inode_backref(inode_cache, key->objectid, parent,
1647 index, namebuf, len, 0, key->type, error);
1649 len = sizeof(*extref) + name_len;
1650 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1657 static int count_csum_range(struct btrfs_root *root, u64 start,
1658 u64 len, u64 *found)
1660 struct btrfs_key key;
1661 struct btrfs_path path;
1662 struct extent_buffer *leaf;
1667 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1669 btrfs_init_path(&path);
1671 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1673 key.type = BTRFS_EXTENT_CSUM_KEY;
1675 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1679 if (ret > 0 && path.slots[0] > 0) {
1680 leaf = path.nodes[0];
1681 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1682 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1683 key.type == BTRFS_EXTENT_CSUM_KEY)
1688 leaf = path.nodes[0];
1689 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1690 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1695 leaf = path.nodes[0];
1698 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1699 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1700 key.type != BTRFS_EXTENT_CSUM_KEY)
1703 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1704 if (key.offset >= start + len)
1707 if (key.offset > start)
1710 size = btrfs_item_size_nr(leaf, path.slots[0]);
1711 csum_end = key.offset + (size / csum_size) *
1712 root->fs_info->sectorsize;
1713 if (csum_end > start) {
1714 size = min(csum_end - start, len);
1723 btrfs_release_path(&path);
1729 static int process_file_extent(struct btrfs_root *root,
1730 struct extent_buffer *eb,
1731 int slot, struct btrfs_key *key,
1732 struct shared_node *active_node)
1734 struct inode_record *rec;
1735 struct btrfs_file_extent_item *fi;
1737 u64 disk_bytenr = 0;
1738 u64 extent_offset = 0;
1739 u64 mask = root->fs_info->sectorsize - 1;
1743 rec = active_node->current;
1744 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1745 rec->found_file_extent = 1;
1747 if (rec->extent_start == (u64)-1) {
1748 rec->extent_start = key->offset;
1749 rec->extent_end = key->offset;
1752 if (rec->extent_end > key->offset)
1753 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1754 else if (rec->extent_end < key->offset) {
1755 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1756 key->offset - rec->extent_end);
1761 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1762 extent_type = btrfs_file_extent_type(eb, fi);
1764 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1765 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1767 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1768 rec->found_size += num_bytes;
1769 num_bytes = (num_bytes + mask) & ~mask;
1770 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1771 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1772 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1773 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1774 extent_offset = btrfs_file_extent_offset(eb, fi);
1775 if (num_bytes == 0 || (num_bytes & mask))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (num_bytes + extent_offset >
1778 btrfs_file_extent_ram_bytes(eb, fi))
1779 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1780 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1781 (btrfs_file_extent_compression(eb, fi) ||
1782 btrfs_file_extent_encryption(eb, fi) ||
1783 btrfs_file_extent_other_encoding(eb, fi)))
1784 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785 if (disk_bytenr > 0)
1786 rec->found_size += num_bytes;
1788 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 rec->extent_end = key->offset + num_bytes;
1793 * The data reloc tree will copy full extents into its inode and then
1794 * copy the corresponding csums. Because the extent it copied could be
1795 * a preallocated extent that hasn't been written to yet there may be no
1796 * csums to copy, ergo we won't have csums for our file extent. This is
1797 * ok so just don't bother checking csums if the inode belongs to the
1800 if (disk_bytenr > 0 &&
1801 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1803 if (btrfs_file_extent_compression(eb, fi))
1804 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1806 disk_bytenr += extent_offset;
1808 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1811 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1813 rec->found_csum_item = 1;
1814 if (found < num_bytes)
1815 rec->some_csum_missing = 1;
1816 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1818 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1824 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1825 struct walk_control *wc)
1827 struct btrfs_key key;
1831 struct cache_tree *inode_cache;
1832 struct shared_node *active_node;
1834 if (wc->root_level == wc->active_node &&
1835 btrfs_root_refs(&root->root_item) == 0)
1838 active_node = wc->nodes[wc->active_node];
1839 inode_cache = &active_node->inode_cache;
1840 nritems = btrfs_header_nritems(eb);
1841 for (i = 0; i < nritems; i++) {
1842 btrfs_item_key_to_cpu(eb, &key, i);
1844 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1846 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1849 if (active_node->current == NULL ||
1850 active_node->current->ino < key.objectid) {
1851 if (active_node->current) {
1852 active_node->current->checked = 1;
1853 maybe_free_inode_rec(inode_cache,
1854 active_node->current);
1856 active_node->current = get_inode_rec(inode_cache,
1858 BUG_ON(IS_ERR(active_node->current));
1861 case BTRFS_DIR_ITEM_KEY:
1862 case BTRFS_DIR_INDEX_KEY:
1863 ret = process_dir_item(eb, i, &key, active_node);
1865 case BTRFS_INODE_REF_KEY:
1866 ret = process_inode_ref(eb, i, &key, active_node);
1868 case BTRFS_INODE_EXTREF_KEY:
1869 ret = process_inode_extref(eb, i, &key, active_node);
1871 case BTRFS_INODE_ITEM_KEY:
1872 ret = process_inode_item(eb, i, &key, active_node);
1874 case BTRFS_EXTENT_DATA_KEY:
1875 ret = process_file_extent(root, eb, i, &key,
1886 u64 bytenr[BTRFS_MAX_LEVEL];
1887 u64 refs[BTRFS_MAX_LEVEL];
1888 int need_check[BTRFS_MAX_LEVEL];
1891 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1892 struct node_refs *nrefs, u64 level);
1893 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1894 unsigned int ext_ref);
1897 * Returns >0 Found error, not fatal, should continue
1898 * Returns <0 Fatal error, must exit the whole check
1899 * Returns 0 No errors found
1901 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1902 struct node_refs *nrefs, int *level, int ext_ref)
1904 struct extent_buffer *cur = path->nodes[0];
1905 struct btrfs_key key;
1909 int root_level = btrfs_header_level(root->node);
1911 int ret = 0; /* Final return value */
1912 int err = 0; /* Positive error bitmap */
1914 cur_bytenr = cur->start;
1916 /* skip to first inode item or the first inode number change */
1917 nritems = btrfs_header_nritems(cur);
1918 for (i = 0; i < nritems; i++) {
1919 btrfs_item_key_to_cpu(cur, &key, i);
1921 first_ino = key.objectid;
1922 if (key.type == BTRFS_INODE_ITEM_KEY ||
1923 (first_ino && first_ino != key.objectid))
1927 path->slots[0] = nritems;
1933 err |= check_inode_item(root, path, ext_ref);
1935 if (err & LAST_ITEM)
1938 /* still have inode items in thie leaf */
1939 if (cur->start == cur_bytenr)
1943 * we have switched to another leaf, above nodes may
1944 * have changed, here walk down the path, if a node
1945 * or leaf is shared, check whether we can skip this
1948 for (i = root_level; i >= 0; i--) {
1949 if (path->nodes[i]->start == nrefs->bytenr[i])
1952 ret = update_nodes_refs(root,
1953 path->nodes[i]->start,
1958 if (!nrefs->need_check[i]) {
1964 for (i = 0; i < *level; i++) {
1965 free_extent_buffer(path->nodes[i]);
1966 path->nodes[i] = NULL;
1975 static void reada_walk_down(struct btrfs_root *root,
1976 struct extent_buffer *node, int slot)
1978 struct btrfs_fs_info *fs_info = root->fs_info;
1985 level = btrfs_header_level(node);
1989 nritems = btrfs_header_nritems(node);
1990 for (i = slot; i < nritems; i++) {
1991 bytenr = btrfs_node_blockptr(node, i);
1992 ptr_gen = btrfs_node_ptr_generation(node, i);
1993 readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
1999 * Check the child node/leaf by the following condition:
2000 * 1. the first item key of the node/leaf should be the same with the one
2002 * 2. block in parent node should match the child node/leaf.
2003 * 3. generation of parent node and child's header should be consistent.
2005 * Or the child node/leaf pointed by the key in parent is not valid.
2007 * We hope to check leaf owner too, but since subvol may share leaves,
2008 * which makes leaf owner check not so strong, key check should be
2009 * sufficient enough for that case.
2011 static int check_child_node(struct extent_buffer *parent, int slot,
2012 struct extent_buffer *child)
2014 struct btrfs_key parent_key;
2015 struct btrfs_key child_key;
2018 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2019 if (btrfs_header_level(child) == 0)
2020 btrfs_item_key_to_cpu(child, &child_key, 0);
2022 btrfs_node_key_to_cpu(child, &child_key, 0);
2024 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2027 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2028 parent_key.objectid, parent_key.type, parent_key.offset,
2029 child_key.objectid, child_key.type, child_key.offset);
2031 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2033 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2034 btrfs_node_blockptr(parent, slot),
2035 btrfs_header_bytenr(child));
2037 if (btrfs_node_ptr_generation(parent, slot) !=
2038 btrfs_header_generation(child)) {
2040 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2041 btrfs_header_generation(child),
2042 btrfs_node_ptr_generation(parent, slot));
2048 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2049 * in every fs or file tree check. Here we find its all root ids, and only check
2050 * it in the fs or file tree which has the smallest root id.
2052 static int need_check(struct btrfs_root *root, struct ulist *roots)
2054 struct rb_node *node;
2055 struct ulist_node *u;
2057 if (roots->nnodes == 1)
2060 node = rb_first(&roots->root);
2061 u = rb_entry(node, struct ulist_node, rb_node);
2063 * current root id is not smallest, we skip it and let it be checked
2064 * in the fs or file tree who hash the smallest root id.
2066 if (root->objectid != u->val)
2073 * for a tree node or leaf, we record its reference count, so later if we still
2074 * process this node or leaf, don't need to compute its reference count again.
2076 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2077 struct node_refs *nrefs, u64 level)
2081 struct ulist *roots;
2083 if (nrefs->bytenr[level] != bytenr) {
2084 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2085 level, 1, &refs, NULL);
2089 nrefs->bytenr[level] = bytenr;
2090 nrefs->refs[level] = refs;
2092 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2097 check = need_check(root, roots);
2099 nrefs->need_check[level] = check;
2101 nrefs->need_check[level] = 1;
2108 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2109 struct walk_control *wc, int *level,
2110 struct node_refs *nrefs)
2112 enum btrfs_tree_block_status status;
2115 struct btrfs_fs_info *fs_info = root->fs_info;
2116 struct extent_buffer *next;
2117 struct extent_buffer *cur;
2121 WARN_ON(*level < 0);
2122 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2124 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2125 refs = nrefs->refs[*level];
2128 ret = btrfs_lookup_extent_info(NULL, root,
2129 path->nodes[*level]->start,
2130 *level, 1, &refs, NULL);
2135 nrefs->bytenr[*level] = path->nodes[*level]->start;
2136 nrefs->refs[*level] = refs;
2140 ret = enter_shared_node(root, path->nodes[*level]->start,
2148 while (*level >= 0) {
2149 WARN_ON(*level < 0);
2150 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2151 cur = path->nodes[*level];
2153 if (btrfs_header_level(cur) != *level)
2156 if (path->slots[*level] >= btrfs_header_nritems(cur))
2159 ret = process_one_leaf(root, cur, wc);
2164 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2165 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2167 if (bytenr == nrefs->bytenr[*level - 1]) {
2168 refs = nrefs->refs[*level - 1];
2170 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2171 *level - 1, 1, &refs, NULL);
2175 nrefs->bytenr[*level - 1] = bytenr;
2176 nrefs->refs[*level - 1] = refs;
2181 ret = enter_shared_node(root, bytenr, refs,
2184 path->slots[*level]++;
2189 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2190 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2191 free_extent_buffer(next);
2192 reada_walk_down(root, cur, path->slots[*level]);
2193 next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2194 if (!extent_buffer_uptodate(next)) {
2195 struct btrfs_key node_key;
2197 btrfs_node_key_to_cpu(path->nodes[*level],
2199 path->slots[*level]);
2200 btrfs_add_corrupt_extent_record(root->fs_info,
2202 path->nodes[*level]->start,
2203 root->fs_info->nodesize,
2210 ret = check_child_node(cur, path->slots[*level], next);
2212 free_extent_buffer(next);
2217 if (btrfs_is_leaf(next))
2218 status = btrfs_check_leaf(root, NULL, next);
2220 status = btrfs_check_node(root, NULL, next);
2221 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2222 free_extent_buffer(next);
2227 *level = *level - 1;
2228 free_extent_buffer(path->nodes[*level]);
2229 path->nodes[*level] = next;
2230 path->slots[*level] = 0;
2233 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2237 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2238 unsigned int ext_ref);
2241 * Returns >0 Found error, should continue
2242 * Returns <0 Fatal error, must exit the whole check
2243 * Returns 0 No errors found
2245 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2246 int *level, struct node_refs *nrefs, int ext_ref)
2248 enum btrfs_tree_block_status status;
2251 struct btrfs_fs_info *fs_info = root->fs_info;
2252 struct extent_buffer *next;
2253 struct extent_buffer *cur;
2256 WARN_ON(*level < 0);
2257 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2259 ret = update_nodes_refs(root, path->nodes[*level]->start,
2264 while (*level >= 0) {
2265 WARN_ON(*level < 0);
2266 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2267 cur = path->nodes[*level];
2269 if (btrfs_header_level(cur) != *level)
2272 if (path->slots[*level] >= btrfs_header_nritems(cur))
2274 /* Don't forgot to check leaf/node validation */
2276 ret = btrfs_check_leaf(root, NULL, cur);
2277 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2281 ret = process_one_leaf_v2(root, path, nrefs,
2285 ret = btrfs_check_node(root, NULL, cur);
2286 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2291 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2292 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2294 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2297 if (!nrefs->need_check[*level - 1]) {
2298 path->slots[*level]++;
2302 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2303 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2304 free_extent_buffer(next);
2305 reada_walk_down(root, cur, path->slots[*level]);
2306 next = read_tree_block(fs_info, bytenr, ptr_gen);
2307 if (!extent_buffer_uptodate(next)) {
2308 struct btrfs_key node_key;
2310 btrfs_node_key_to_cpu(path->nodes[*level],
2312 path->slots[*level]);
2313 btrfs_add_corrupt_extent_record(fs_info,
2315 path->nodes[*level]->start,
2323 ret = check_child_node(cur, path->slots[*level], next);
2327 if (btrfs_is_leaf(next))
2328 status = btrfs_check_leaf(root, NULL, next);
2330 status = btrfs_check_node(root, NULL, next);
2331 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2332 free_extent_buffer(next);
2337 *level = *level - 1;
2338 free_extent_buffer(path->nodes[*level]);
2339 path->nodes[*level] = next;
2340 path->slots[*level] = 0;
2345 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2346 struct walk_control *wc, int *level)
2349 struct extent_buffer *leaf;
2351 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2352 leaf = path->nodes[i];
2353 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2358 free_extent_buffer(path->nodes[*level]);
2359 path->nodes[*level] = NULL;
2360 BUG_ON(*level > wc->active_node);
2361 if (*level == wc->active_node)
2362 leave_shared_node(root, wc, *level);
2369 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2373 struct extent_buffer *leaf;
2375 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2376 leaf = path->nodes[i];
2377 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2382 free_extent_buffer(path->nodes[*level]);
2383 path->nodes[*level] = NULL;
2390 static int check_root_dir(struct inode_record *rec)
2392 struct inode_backref *backref;
2395 if (!rec->found_inode_item || rec->errors)
2397 if (rec->nlink != 1 || rec->found_link != 0)
2399 if (list_empty(&rec->backrefs))
2401 backref = to_inode_backref(rec->backrefs.next);
2402 if (!backref->found_inode_ref)
2404 if (backref->index != 0 || backref->namelen != 2 ||
2405 memcmp(backref->name, "..", 2))
2407 if (backref->found_dir_index || backref->found_dir_item)
2414 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2415 struct btrfs_root *root, struct btrfs_path *path,
2416 struct inode_record *rec)
2418 struct btrfs_inode_item *ei;
2419 struct btrfs_key key;
2422 key.objectid = rec->ino;
2423 key.type = BTRFS_INODE_ITEM_KEY;
2424 key.offset = (u64)-1;
2426 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2430 if (!path->slots[0]) {
2437 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2438 if (key.objectid != rec->ino) {
2443 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2444 struct btrfs_inode_item);
2445 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2446 btrfs_mark_buffer_dirty(path->nodes[0]);
2447 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2448 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2449 root->root_key.objectid);
2451 btrfs_release_path(path);
2455 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2456 struct btrfs_root *root,
2457 struct btrfs_path *path,
2458 struct inode_record *rec)
2462 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2463 btrfs_release_path(path);
2465 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2469 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2470 struct btrfs_root *root,
2471 struct btrfs_path *path,
2472 struct inode_record *rec)
2474 struct btrfs_inode_item *ei;
2475 struct btrfs_key key;
2478 key.objectid = rec->ino;
2479 key.type = BTRFS_INODE_ITEM_KEY;
2482 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2489 /* Since ret == 0, no need to check anything */
2490 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2491 struct btrfs_inode_item);
2492 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2493 btrfs_mark_buffer_dirty(path->nodes[0]);
2494 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2495 printf("reset nbytes for ino %llu root %llu\n",
2496 rec->ino, root->root_key.objectid);
2498 btrfs_release_path(path);
2502 static int add_missing_dir_index(struct btrfs_root *root,
2503 struct cache_tree *inode_cache,
2504 struct inode_record *rec,
2505 struct inode_backref *backref)
2507 struct btrfs_path path;
2508 struct btrfs_trans_handle *trans;
2509 struct btrfs_dir_item *dir_item;
2510 struct extent_buffer *leaf;
2511 struct btrfs_key key;
2512 struct btrfs_disk_key disk_key;
2513 struct inode_record *dir_rec;
2514 unsigned long name_ptr;
2515 u32 data_size = sizeof(*dir_item) + backref->namelen;
2518 trans = btrfs_start_transaction(root, 1);
2520 return PTR_ERR(trans);
2522 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2523 (unsigned long long)rec->ino);
2525 btrfs_init_path(&path);
2526 key.objectid = backref->dir;
2527 key.type = BTRFS_DIR_INDEX_KEY;
2528 key.offset = backref->index;
2529 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2532 leaf = path.nodes[0];
2533 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2535 disk_key.objectid = cpu_to_le64(rec->ino);
2536 disk_key.type = BTRFS_INODE_ITEM_KEY;
2537 disk_key.offset = 0;
2539 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2540 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2541 btrfs_set_dir_data_len(leaf, dir_item, 0);
2542 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2543 name_ptr = (unsigned long)(dir_item + 1);
2544 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2545 btrfs_mark_buffer_dirty(leaf);
2546 btrfs_release_path(&path);
2547 btrfs_commit_transaction(trans, root);
2549 backref->found_dir_index = 1;
2550 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2551 BUG_ON(IS_ERR(dir_rec));
2554 dir_rec->found_size += backref->namelen;
2555 if (dir_rec->found_size == dir_rec->isize &&
2556 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2557 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2558 if (dir_rec->found_size != dir_rec->isize)
2559 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2564 static int delete_dir_index(struct btrfs_root *root,
2565 struct inode_backref *backref)
2567 struct btrfs_trans_handle *trans;
2568 struct btrfs_dir_item *di;
2569 struct btrfs_path path;
2572 trans = btrfs_start_transaction(root, 1);
2574 return PTR_ERR(trans);
2576 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2577 (unsigned long long)backref->dir,
2578 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2579 (unsigned long long)root->objectid);
2581 btrfs_init_path(&path);
2582 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2583 backref->name, backref->namelen,
2584 backref->index, -1);
2587 btrfs_release_path(&path);
2588 btrfs_commit_transaction(trans, root);
2595 ret = btrfs_del_item(trans, root, &path);
2597 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2599 btrfs_release_path(&path);
2600 btrfs_commit_transaction(trans, root);
2604 static int create_inode_item(struct btrfs_root *root,
2605 struct inode_record *rec,
2608 struct btrfs_trans_handle *trans;
2609 struct btrfs_inode_item inode_item;
2610 time_t now = time(NULL);
2613 trans = btrfs_start_transaction(root, 1);
2614 if (IS_ERR(trans)) {
2615 ret = PTR_ERR(trans);
2619 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2620 "be incomplete, please check permissions and content after "
2621 "the fsck completes.\n", (unsigned long long)root->objectid,
2622 (unsigned long long)rec->ino);
2624 memset(&inode_item, 0, sizeof(inode_item));
2625 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2627 btrfs_set_stack_inode_nlink(&inode_item, 1);
2629 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2630 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2631 if (rec->found_dir_item) {
2632 if (rec->found_file_extent)
2633 fprintf(stderr, "root %llu inode %llu has both a dir "
2634 "item and extents, unsure if it is a dir or a "
2635 "regular file so setting it as a directory\n",
2636 (unsigned long long)root->objectid,
2637 (unsigned long long)rec->ino);
2638 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2639 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2640 } else if (!rec->found_dir_item) {
2641 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2642 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2644 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2645 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2646 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2647 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2648 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2649 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2650 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2651 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2653 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2655 btrfs_commit_transaction(trans, root);
2659 static int repair_inode_backrefs(struct btrfs_root *root,
2660 struct inode_record *rec,
2661 struct cache_tree *inode_cache,
2664 struct inode_backref *tmp, *backref;
2665 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2669 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2670 if (!delete && rec->ino == root_dirid) {
2671 if (!rec->found_inode_item) {
2672 ret = create_inode_item(root, rec, 1);
2679 /* Index 0 for root dir's are special, don't mess with it */
2680 if (rec->ino == root_dirid && backref->index == 0)
2684 ((backref->found_dir_index && !backref->found_inode_ref) ||
2685 (backref->found_dir_index && backref->found_inode_ref &&
2686 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2687 ret = delete_dir_index(root, backref);
2691 list_del(&backref->list);
2696 if (!delete && !backref->found_dir_index &&
2697 backref->found_dir_item && backref->found_inode_ref) {
2698 ret = add_missing_dir_index(root, inode_cache, rec,
2703 if (backref->found_dir_item &&
2704 backref->found_dir_index) {
2705 if (!backref->errors &&
2706 backref->found_inode_ref) {
2707 list_del(&backref->list);
2714 if (!delete && (!backref->found_dir_index &&
2715 !backref->found_dir_item &&
2716 backref->found_inode_ref)) {
2717 struct btrfs_trans_handle *trans;
2718 struct btrfs_key location;
2720 ret = check_dir_conflict(root, backref->name,
2726 * let nlink fixing routine to handle it,
2727 * which can do it better.
2732 location.objectid = rec->ino;
2733 location.type = BTRFS_INODE_ITEM_KEY;
2734 location.offset = 0;
2736 trans = btrfs_start_transaction(root, 1);
2737 if (IS_ERR(trans)) {
2738 ret = PTR_ERR(trans);
2741 fprintf(stderr, "adding missing dir index/item pair "
2743 (unsigned long long)rec->ino);
2744 ret = btrfs_insert_dir_item(trans, root, backref->name,
2746 backref->dir, &location,
2747 imode_to_type(rec->imode),
2750 btrfs_commit_transaction(trans, root);
2754 if (!delete && (backref->found_inode_ref &&
2755 backref->found_dir_index &&
2756 backref->found_dir_item &&
2757 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2758 !rec->found_inode_item)) {
2759 ret = create_inode_item(root, rec, 0);
2766 return ret ? ret : repaired;
2770 * To determine the file type for nlink/inode_item repair
2772 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2773 * Return -ENOENT if file type is not found.
2775 static int find_file_type(struct inode_record *rec, u8 *type)
2777 struct inode_backref *backref;
2779 /* For inode item recovered case */
2780 if (rec->found_inode_item) {
2781 *type = imode_to_type(rec->imode);
2785 list_for_each_entry(backref, &rec->backrefs, list) {
2786 if (backref->found_dir_index || backref->found_dir_item) {
2787 *type = backref->filetype;
2795 * To determine the file name for nlink repair
2797 * Return 0 if file name is found, set name and namelen.
2798 * Return -ENOENT if file name is not found.
2800 static int find_file_name(struct inode_record *rec,
2801 char *name, int *namelen)
2803 struct inode_backref *backref;
2805 list_for_each_entry(backref, &rec->backrefs, list) {
2806 if (backref->found_dir_index || backref->found_dir_item ||
2807 backref->found_inode_ref) {
2808 memcpy(name, backref->name, backref->namelen);
2809 *namelen = backref->namelen;
2816 /* Reset the nlink of the inode to the correct one */
2817 static int reset_nlink(struct btrfs_trans_handle *trans,
2818 struct btrfs_root *root,
2819 struct btrfs_path *path,
2820 struct inode_record *rec)
2822 struct inode_backref *backref;
2823 struct inode_backref *tmp;
2824 struct btrfs_key key;
2825 struct btrfs_inode_item *inode_item;
2828 /* We don't believe this either, reset it and iterate backref */
2829 rec->found_link = 0;
2831 /* Remove all backref including the valid ones */
2832 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2833 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2834 backref->index, backref->name,
2835 backref->namelen, 0);
2839 /* remove invalid backref, so it won't be added back */
2840 if (!(backref->found_dir_index &&
2841 backref->found_dir_item &&
2842 backref->found_inode_ref)) {
2843 list_del(&backref->list);
2850 /* Set nlink to 0 */
2851 key.objectid = rec->ino;
2852 key.type = BTRFS_INODE_ITEM_KEY;
2854 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2861 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2862 struct btrfs_inode_item);
2863 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2864 btrfs_mark_buffer_dirty(path->nodes[0]);
2865 btrfs_release_path(path);
2868 * Add back valid inode_ref/dir_item/dir_index,
2869 * add_link() will handle the nlink inc, so new nlink must be correct
2871 list_for_each_entry(backref, &rec->backrefs, list) {
2872 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2873 backref->name, backref->namelen,
2874 backref->filetype, &backref->index, 1);
2879 btrfs_release_path(path);
2883 static int get_highest_inode(struct btrfs_trans_handle *trans,
2884 struct btrfs_root *root,
2885 struct btrfs_path *path,
2888 struct btrfs_key key, found_key;
2891 btrfs_init_path(path);
2892 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2894 key.type = BTRFS_INODE_ITEM_KEY;
2895 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2897 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2898 path->slots[0] - 1);
2899 *highest_ino = found_key.objectid;
2902 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2904 btrfs_release_path(path);
2908 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2909 struct btrfs_root *root,
2910 struct btrfs_path *path,
2911 struct inode_record *rec)
2913 char *dir_name = "lost+found";
2914 char namebuf[BTRFS_NAME_LEN] = {0};
2919 int name_recovered = 0;
2920 int type_recovered = 0;
2924 * Get file name and type first before these invalid inode ref
2925 * are deleted by remove_all_invalid_backref()
2927 name_recovered = !find_file_name(rec, namebuf, &namelen);
2928 type_recovered = !find_file_type(rec, &type);
2930 if (!name_recovered) {
2931 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2932 rec->ino, rec->ino);
2933 namelen = count_digits(rec->ino);
2934 sprintf(namebuf, "%llu", rec->ino);
2937 if (!type_recovered) {
2938 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2940 type = BTRFS_FT_REG_FILE;
2944 ret = reset_nlink(trans, root, path, rec);
2947 "Failed to reset nlink for inode %llu: %s\n",
2948 rec->ino, strerror(-ret));
2952 if (rec->found_link == 0) {
2953 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2957 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2958 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2961 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2962 dir_name, strerror(-ret));
2965 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2966 namebuf, namelen, type, NULL, 1);
2968 * Add ".INO" suffix several times to handle case where
2969 * "FILENAME.INO" is already taken by another file.
2971 while (ret == -EEXIST) {
2973 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2975 if (namelen + count_digits(rec->ino) + 1 >
2980 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2982 namelen += count_digits(rec->ino) + 1;
2983 ret = btrfs_add_link(trans, root, rec->ino,
2984 lost_found_ino, namebuf,
2985 namelen, type, NULL, 1);
2989 "Failed to link the inode %llu to %s dir: %s\n",
2990 rec->ino, dir_name, strerror(-ret));
2994 * Just increase the found_link, don't actually add the
2995 * backref. This will make things easier and this inode
2996 * record will be freed after the repair is done.
2997 * So fsck will not report problem about this inode.
3000 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3001 namelen, namebuf, dir_name);
3003 printf("Fixed the nlink of inode %llu\n", rec->ino);
3006 * Clear the flag anyway, or we will loop forever for the same inode
3007 * as it will not be removed from the bad inode list and the dead loop
3010 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3011 btrfs_release_path(path);
3016 * Check if there is any normal(reg or prealloc) file extent for given
3018 * This is used to determine the file type when neither its dir_index/item or
3019 * inode_item exists.
3021 * This will *NOT* report error, if any error happens, just consider it does
3022 * not have any normal file extent.
3024 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3026 struct btrfs_path path;
3027 struct btrfs_key key;
3028 struct btrfs_key found_key;
3029 struct btrfs_file_extent_item *fi;
3033 btrfs_init_path(&path);
3035 key.type = BTRFS_EXTENT_DATA_KEY;
3038 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3043 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3044 ret = btrfs_next_leaf(root, &path);
3051 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3053 if (found_key.objectid != ino ||
3054 found_key.type != BTRFS_EXTENT_DATA_KEY)
3056 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3057 struct btrfs_file_extent_item);
3058 type = btrfs_file_extent_type(path.nodes[0], fi);
3059 if (type != BTRFS_FILE_EXTENT_INLINE) {
3065 btrfs_release_path(&path);
3069 static u32 btrfs_type_to_imode(u8 type)
3071 static u32 imode_by_btrfs_type[] = {
3072 [BTRFS_FT_REG_FILE] = S_IFREG,
3073 [BTRFS_FT_DIR] = S_IFDIR,
3074 [BTRFS_FT_CHRDEV] = S_IFCHR,
3075 [BTRFS_FT_BLKDEV] = S_IFBLK,
3076 [BTRFS_FT_FIFO] = S_IFIFO,
3077 [BTRFS_FT_SOCK] = S_IFSOCK,
3078 [BTRFS_FT_SYMLINK] = S_IFLNK,
3081 return imode_by_btrfs_type[(type)];
3084 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3085 struct btrfs_root *root,
3086 struct btrfs_path *path,
3087 struct inode_record *rec)
3091 int type_recovered = 0;
3094 printf("Trying to rebuild inode:%llu\n", rec->ino);
3096 type_recovered = !find_file_type(rec, &filetype);
3099 * Try to determine inode type if type not found.
3101 * For found regular file extent, it must be FILE.
3102 * For found dir_item/index, it must be DIR.
3104 * For undetermined one, use FILE as fallback.
3107 * 1. If found backref(inode_index/item is already handled) to it,
3109 * Need new inode-inode ref structure to allow search for that.
3111 if (!type_recovered) {
3112 if (rec->found_file_extent &&
3113 find_normal_file_extent(root, rec->ino)) {
3115 filetype = BTRFS_FT_REG_FILE;
3116 } else if (rec->found_dir_item) {
3118 filetype = BTRFS_FT_DIR;
3119 } else if (!list_empty(&rec->orphan_extents)) {
3121 filetype = BTRFS_FT_REG_FILE;
3123 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3126 filetype = BTRFS_FT_REG_FILE;
3130 ret = btrfs_new_inode(trans, root, rec->ino,
3131 mode | btrfs_type_to_imode(filetype));
3136 * Here inode rebuild is done, we only rebuild the inode item,
3137 * don't repair the nlink(like move to lost+found).
3138 * That is the job of nlink repair.
3140 * We just fill the record and return
3142 rec->found_dir_item = 1;
3143 rec->imode = mode | btrfs_type_to_imode(filetype);
3145 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3146 /* Ensure the inode_nlinks repair function will be called */
3147 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3152 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3153 struct btrfs_root *root,
3154 struct btrfs_path *path,
3155 struct inode_record *rec)
3157 struct orphan_data_extent *orphan;
3158 struct orphan_data_extent *tmp;
3161 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3163 * Check for conflicting file extents
3165 * Here we don't know whether the extents is compressed or not,
3166 * so we can only assume it not compressed nor data offset,
3167 * and use its disk_len as extent length.
3169 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3170 orphan->offset, orphan->disk_len, 0);
3171 btrfs_release_path(path);
3176 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3177 orphan->disk_bytenr, orphan->disk_len);
3178 ret = btrfs_free_extent(trans,
3179 root->fs_info->extent_root,
3180 orphan->disk_bytenr, orphan->disk_len,
3181 0, root->objectid, orphan->objectid,
3186 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3187 orphan->offset, orphan->disk_bytenr,
3188 orphan->disk_len, orphan->disk_len);
3192 /* Update file size info */
3193 rec->found_size += orphan->disk_len;
3194 if (rec->found_size == rec->nbytes)
3195 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3197 /* Update the file extent hole info too */
3198 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3202 if (RB_EMPTY_ROOT(&rec->holes))
3203 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3205 list_del(&orphan->list);
3208 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3213 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3214 struct btrfs_root *root,
3215 struct btrfs_path *path,
3216 struct inode_record *rec)
3218 struct rb_node *node;
3219 struct file_extent_hole *hole;
3223 node = rb_first(&rec->holes);
3227 hole = rb_entry(node, struct file_extent_hole, node);
3228 ret = btrfs_punch_hole(trans, root, rec->ino,
3229 hole->start, hole->len);
3232 ret = del_file_extent_hole(&rec->holes, hole->start,
3236 if (RB_EMPTY_ROOT(&rec->holes))
3237 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3238 node = rb_first(&rec->holes);
3240 /* special case for a file losing all its file extent */
3242 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3243 round_up(rec->isize,
3244 root->fs_info->sectorsize));
3248 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3249 rec->ino, root->objectid);
3254 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3256 struct btrfs_trans_handle *trans;
3257 struct btrfs_path path;
3260 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3261 I_ERR_NO_ORPHAN_ITEM |
3262 I_ERR_LINK_COUNT_WRONG |
3263 I_ERR_NO_INODE_ITEM |
3264 I_ERR_FILE_EXTENT_ORPHAN |
3265 I_ERR_FILE_EXTENT_DISCOUNT|
3266 I_ERR_FILE_NBYTES_WRONG)))
3270 * For nlink repair, it may create a dir and add link, so
3271 * 2 for parent(256)'s dir_index and dir_item
3272 * 2 for lost+found dir's inode_item and inode_ref
3273 * 1 for the new inode_ref of the file
3274 * 2 for lost+found dir's dir_index and dir_item for the file
3276 trans = btrfs_start_transaction(root, 7);
3278 return PTR_ERR(trans);
3280 btrfs_init_path(&path);
3281 if (rec->errors & I_ERR_NO_INODE_ITEM)
3282 ret = repair_inode_no_item(trans, root, &path, rec);
3283 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3284 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3285 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3286 ret = repair_inode_discount_extent(trans, root, &path, rec);
3287 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3288 ret = repair_inode_isize(trans, root, &path, rec);
3289 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3290 ret = repair_inode_orphan_item(trans, root, &path, rec);
3291 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3292 ret = repair_inode_nlinks(trans, root, &path, rec);
3293 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3294 ret = repair_inode_nbytes(trans, root, &path, rec);
3295 btrfs_commit_transaction(trans, root);
3296 btrfs_release_path(&path);
3300 static int check_inode_recs(struct btrfs_root *root,
3301 struct cache_tree *inode_cache)
3303 struct cache_extent *cache;
3304 struct ptr_node *node;
3305 struct inode_record *rec;
3306 struct inode_backref *backref;
3311 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3313 if (btrfs_root_refs(&root->root_item) == 0) {
3314 if (!cache_tree_empty(inode_cache))
3315 fprintf(stderr, "warning line %d\n", __LINE__);
3320 * We need to repair backrefs first because we could change some of the
3321 * errors in the inode recs.
3323 * We also need to go through and delete invalid backrefs first and then
3324 * add the correct ones second. We do this because we may get EEXIST
3325 * when adding back the correct index because we hadn't yet deleted the
3328 * For example, if we were missing a dir index then the directories
3329 * isize would be wrong, so if we fixed the isize to what we thought it
3330 * would be and then fixed the backref we'd still have a invalid fs, so
3331 * we need to add back the dir index and then check to see if the isize
3336 if (stage == 3 && !err)
3339 cache = search_cache_extent(inode_cache, 0);
3340 while (repair && cache) {
3341 node = container_of(cache, struct ptr_node, cache);
3343 cache = next_cache_extent(cache);
3345 /* Need to free everything up and rescan */
3347 remove_cache_extent(inode_cache, &node->cache);
3349 free_inode_rec(rec);
3353 if (list_empty(&rec->backrefs))
3356 ret = repair_inode_backrefs(root, rec, inode_cache,
3370 rec = get_inode_rec(inode_cache, root_dirid, 0);
3371 BUG_ON(IS_ERR(rec));
3373 ret = check_root_dir(rec);
3375 fprintf(stderr, "root %llu root dir %llu error\n",
3376 (unsigned long long)root->root_key.objectid,
3377 (unsigned long long)root_dirid);
3378 print_inode_error(root, rec);
3383 struct btrfs_trans_handle *trans;
3385 trans = btrfs_start_transaction(root, 1);
3386 if (IS_ERR(trans)) {
3387 err = PTR_ERR(trans);
3392 "root %llu missing its root dir, recreating\n",
3393 (unsigned long long)root->objectid);
3395 ret = btrfs_make_root_dir(trans, root, root_dirid);
3398 btrfs_commit_transaction(trans, root);
3402 fprintf(stderr, "root %llu root dir %llu not found\n",
3403 (unsigned long long)root->root_key.objectid,
3404 (unsigned long long)root_dirid);
3408 cache = search_cache_extent(inode_cache, 0);
3411 node = container_of(cache, struct ptr_node, cache);
3413 remove_cache_extent(inode_cache, &node->cache);
3415 if (rec->ino == root_dirid ||
3416 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3417 free_inode_rec(rec);
3421 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3422 ret = check_orphan_item(root, rec->ino);
3424 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3425 if (can_free_inode_rec(rec)) {
3426 free_inode_rec(rec);
3431 if (!rec->found_inode_item)
3432 rec->errors |= I_ERR_NO_INODE_ITEM;
3433 if (rec->found_link != rec->nlink)
3434 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3436 ret = try_repair_inode(root, rec);
3437 if (ret == 0 && can_free_inode_rec(rec)) {
3438 free_inode_rec(rec);
3444 if (!(repair && ret == 0))
3446 print_inode_error(root, rec);
3447 list_for_each_entry(backref, &rec->backrefs, list) {
3448 if (!backref->found_dir_item)
3449 backref->errors |= REF_ERR_NO_DIR_ITEM;
3450 if (!backref->found_dir_index)
3451 backref->errors |= REF_ERR_NO_DIR_INDEX;
3452 if (!backref->found_inode_ref)
3453 backref->errors |= REF_ERR_NO_INODE_REF;
3454 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3455 " namelen %u name %s filetype %d errors %x",
3456 (unsigned long long)backref->dir,
3457 (unsigned long long)backref->index,
3458 backref->namelen, backref->name,
3459 backref->filetype, backref->errors);
3460 print_ref_error(backref->errors);
3462 free_inode_rec(rec);
3464 return (error > 0) ? -1 : 0;
3467 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3470 struct cache_extent *cache;
3471 struct root_record *rec = NULL;
3474 cache = lookup_cache_extent(root_cache, objectid, 1);
3476 rec = container_of(cache, struct root_record, cache);
3478 rec = calloc(1, sizeof(*rec));
3480 return ERR_PTR(-ENOMEM);
3481 rec->objectid = objectid;
3482 INIT_LIST_HEAD(&rec->backrefs);
3483 rec->cache.start = objectid;
3484 rec->cache.size = 1;
3486 ret = insert_cache_extent(root_cache, &rec->cache);
3488 return ERR_PTR(-EEXIST);
3493 static struct root_backref *get_root_backref(struct root_record *rec,
3494 u64 ref_root, u64 dir, u64 index,
3495 const char *name, int namelen)
3497 struct root_backref *backref;
3499 list_for_each_entry(backref, &rec->backrefs, list) {
3500 if (backref->ref_root != ref_root || backref->dir != dir ||
3501 backref->namelen != namelen)
3503 if (memcmp(name, backref->name, namelen))
3508 backref = calloc(1, sizeof(*backref) + namelen + 1);
3511 backref->ref_root = ref_root;
3513 backref->index = index;
3514 backref->namelen = namelen;
3515 memcpy(backref->name, name, namelen);
3516 backref->name[namelen] = '\0';
3517 list_add_tail(&backref->list, &rec->backrefs);
3521 static void free_root_record(struct cache_extent *cache)
3523 struct root_record *rec;
3524 struct root_backref *backref;
3526 rec = container_of(cache, struct root_record, cache);
3527 while (!list_empty(&rec->backrefs)) {
3528 backref = to_root_backref(rec->backrefs.next);
3529 list_del(&backref->list);
3536 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3538 static int add_root_backref(struct cache_tree *root_cache,
3539 u64 root_id, u64 ref_root, u64 dir, u64 index,
3540 const char *name, int namelen,
3541 int item_type, int errors)
3543 struct root_record *rec;
3544 struct root_backref *backref;
3546 rec = get_root_rec(root_cache, root_id);
3547 BUG_ON(IS_ERR(rec));
3548 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3551 backref->errors |= errors;
3553 if (item_type != BTRFS_DIR_ITEM_KEY) {
3554 if (backref->found_dir_index || backref->found_back_ref ||
3555 backref->found_forward_ref) {
3556 if (backref->index != index)
3557 backref->errors |= REF_ERR_INDEX_UNMATCH;
3559 backref->index = index;
3563 if (item_type == BTRFS_DIR_ITEM_KEY) {
3564 if (backref->found_forward_ref)
3566 backref->found_dir_item = 1;
3567 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3568 backref->found_dir_index = 1;
3569 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3570 if (backref->found_forward_ref)
3571 backref->errors |= REF_ERR_DUP_ROOT_REF;
3572 else if (backref->found_dir_item)
3574 backref->found_forward_ref = 1;
3575 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3576 if (backref->found_back_ref)
3577 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3578 backref->found_back_ref = 1;
3583 if (backref->found_forward_ref && backref->found_dir_item)
3584 backref->reachable = 1;
3588 static int merge_root_recs(struct btrfs_root *root,
3589 struct cache_tree *src_cache,
3590 struct cache_tree *dst_cache)
3592 struct cache_extent *cache;
3593 struct ptr_node *node;
3594 struct inode_record *rec;
3595 struct inode_backref *backref;
3598 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3599 free_inode_recs_tree(src_cache);
3604 cache = search_cache_extent(src_cache, 0);
3607 node = container_of(cache, struct ptr_node, cache);
3609 remove_cache_extent(src_cache, &node->cache);
3612 ret = is_child_root(root, root->objectid, rec->ino);
3618 list_for_each_entry(backref, &rec->backrefs, list) {
3619 BUG_ON(backref->found_inode_ref);
3620 if (backref->found_dir_item)
3621 add_root_backref(dst_cache, rec->ino,
3622 root->root_key.objectid, backref->dir,
3623 backref->index, backref->name,
3624 backref->namelen, BTRFS_DIR_ITEM_KEY,
3626 if (backref->found_dir_index)
3627 add_root_backref(dst_cache, rec->ino,
3628 root->root_key.objectid, backref->dir,
3629 backref->index, backref->name,
3630 backref->namelen, BTRFS_DIR_INDEX_KEY,
3634 free_inode_rec(rec);
3641 static int check_root_refs(struct btrfs_root *root,
3642 struct cache_tree *root_cache)
3644 struct root_record *rec;
3645 struct root_record *ref_root;
3646 struct root_backref *backref;
3647 struct cache_extent *cache;
3653 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3654 BUG_ON(IS_ERR(rec));
3657 /* fixme: this can not detect circular references */
3660 cache = search_cache_extent(root_cache, 0);
3664 rec = container_of(cache, struct root_record, cache);
3665 cache = next_cache_extent(cache);
3667 if (rec->found_ref == 0)
3670 list_for_each_entry(backref, &rec->backrefs, list) {
3671 if (!backref->reachable)
3674 ref_root = get_root_rec(root_cache,
3676 BUG_ON(IS_ERR(ref_root));
3677 if (ref_root->found_ref > 0)
3680 backref->reachable = 0;
3682 if (rec->found_ref == 0)
3688 cache = search_cache_extent(root_cache, 0);
3692 rec = container_of(cache, struct root_record, cache);
3693 cache = next_cache_extent(cache);
3695 if (rec->found_ref == 0 &&
3696 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3697 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3698 ret = check_orphan_item(root->fs_info->tree_root,
3704 * If we don't have a root item then we likely just have
3705 * a dir item in a snapshot for this root but no actual
3706 * ref key or anything so it's meaningless.
3708 if (!rec->found_root_item)
3711 fprintf(stderr, "fs tree %llu not referenced\n",
3712 (unsigned long long)rec->objectid);
3716 if (rec->found_ref > 0 && !rec->found_root_item)
3718 list_for_each_entry(backref, &rec->backrefs, list) {
3719 if (!backref->found_dir_item)
3720 backref->errors |= REF_ERR_NO_DIR_ITEM;
3721 if (!backref->found_dir_index)
3722 backref->errors |= REF_ERR_NO_DIR_INDEX;
3723 if (!backref->found_back_ref)
3724 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3725 if (!backref->found_forward_ref)
3726 backref->errors |= REF_ERR_NO_ROOT_REF;
3727 if (backref->reachable && backref->errors)
3734 fprintf(stderr, "fs tree %llu refs %u %s\n",
3735 (unsigned long long)rec->objectid, rec->found_ref,
3736 rec->found_root_item ? "" : "not found");
3738 list_for_each_entry(backref, &rec->backrefs, list) {
3739 if (!backref->reachable)
3741 if (!backref->errors && rec->found_root_item)
3743 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3744 " index %llu namelen %u name %s errors %x\n",
3745 (unsigned long long)backref->ref_root,
3746 (unsigned long long)backref->dir,
3747 (unsigned long long)backref->index,
3748 backref->namelen, backref->name,
3750 print_ref_error(backref->errors);
3753 return errors > 0 ? 1 : 0;
3756 static int process_root_ref(struct extent_buffer *eb, int slot,
3757 struct btrfs_key *key,
3758 struct cache_tree *root_cache)
3764 struct btrfs_root_ref *ref;
3765 char namebuf[BTRFS_NAME_LEN];
3768 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3770 dirid = btrfs_root_ref_dirid(eb, ref);
3771 index = btrfs_root_ref_sequence(eb, ref);
3772 name_len = btrfs_root_ref_name_len(eb, ref);
3774 if (name_len <= BTRFS_NAME_LEN) {
3778 len = BTRFS_NAME_LEN;
3779 error = REF_ERR_NAME_TOO_LONG;
3781 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3783 if (key->type == BTRFS_ROOT_REF_KEY) {
3784 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3785 index, namebuf, len, key->type, error);
3787 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3788 index, namebuf, len, key->type, error);
3793 static void free_corrupt_block(struct cache_extent *cache)
3795 struct btrfs_corrupt_block *corrupt;
3797 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3801 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3804 * Repair the btree of the given root.
3806 * The fix is to remove the node key in corrupt_blocks cache_tree.
3807 * and rebalance the tree.
3808 * After the fix, the btree should be writeable.
3810 static int repair_btree(struct btrfs_root *root,
3811 struct cache_tree *corrupt_blocks)
3813 struct btrfs_trans_handle *trans;
3814 struct btrfs_path path;
3815 struct btrfs_corrupt_block *corrupt;
3816 struct cache_extent *cache;
3817 struct btrfs_key key;
3822 if (cache_tree_empty(corrupt_blocks))
3825 trans = btrfs_start_transaction(root, 1);
3826 if (IS_ERR(trans)) {
3827 ret = PTR_ERR(trans);
3828 fprintf(stderr, "Error starting transaction: %s\n",
3832 btrfs_init_path(&path);
3833 cache = first_cache_extent(corrupt_blocks);
3835 corrupt = container_of(cache, struct btrfs_corrupt_block,
3837 level = corrupt->level;
3838 path.lowest_level = level;
3839 key.objectid = corrupt->key.objectid;
3840 key.type = corrupt->key.type;
3841 key.offset = corrupt->key.offset;
3844 * Here we don't want to do any tree balance, since it may
3845 * cause a balance with corrupted brother leaf/node,
3846 * so ins_len set to 0 here.
3847 * Balance will be done after all corrupt node/leaf is deleted.
3849 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3852 offset = btrfs_node_blockptr(path.nodes[level],
3855 /* Remove the ptr */
3856 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3860 * Remove the corresponding extent
3861 * return value is not concerned.
3863 btrfs_release_path(&path);
3864 ret = btrfs_free_extent(trans, root, offset,
3865 root->fs_info->nodesize, 0,
3866 root->root_key.objectid, level - 1, 0);
3867 cache = next_cache_extent(cache);
3870 /* Balance the btree using btrfs_search_slot() */
3871 cache = first_cache_extent(corrupt_blocks);
3873 corrupt = container_of(cache, struct btrfs_corrupt_block,
3875 memcpy(&key, &corrupt->key, sizeof(key));
3876 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3879 /* return will always >0 since it won't find the item */
3881 btrfs_release_path(&path);
3882 cache = next_cache_extent(cache);
3885 btrfs_commit_transaction(trans, root);
3886 btrfs_release_path(&path);
3890 static int check_fs_root(struct btrfs_root *root,
3891 struct cache_tree *root_cache,
3892 struct walk_control *wc)
3898 struct btrfs_path path;
3899 struct shared_node root_node;
3900 struct root_record *rec;
3901 struct btrfs_root_item *root_item = &root->root_item;
3902 struct cache_tree corrupt_blocks;
3903 struct orphan_data_extent *orphan;
3904 struct orphan_data_extent *tmp;
3905 enum btrfs_tree_block_status status;
3906 struct node_refs nrefs;
3909 * Reuse the corrupt_block cache tree to record corrupted tree block
3911 * Unlike the usage in extent tree check, here we do it in a per
3912 * fs/subvol tree base.
3914 cache_tree_init(&corrupt_blocks);
3915 root->fs_info->corrupt_blocks = &corrupt_blocks;
3917 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3918 rec = get_root_rec(root_cache, root->root_key.objectid);
3919 BUG_ON(IS_ERR(rec));
3920 if (btrfs_root_refs(root_item) > 0)
3921 rec->found_root_item = 1;
3924 btrfs_init_path(&path);
3925 memset(&root_node, 0, sizeof(root_node));
3926 cache_tree_init(&root_node.root_cache);
3927 cache_tree_init(&root_node.inode_cache);
3928 memset(&nrefs, 0, sizeof(nrefs));
3930 /* Move the orphan extent record to corresponding inode_record */
3931 list_for_each_entry_safe(orphan, tmp,
3932 &root->orphan_data_extents, list) {
3933 struct inode_record *inode;
3935 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3937 BUG_ON(IS_ERR(inode));
3938 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3939 list_move(&orphan->list, &inode->orphan_extents);
3942 level = btrfs_header_level(root->node);
3943 memset(wc->nodes, 0, sizeof(wc->nodes));
3944 wc->nodes[level] = &root_node;
3945 wc->active_node = level;
3946 wc->root_level = level;
3948 /* We may not have checked the root block, lets do that now */
3949 if (btrfs_is_leaf(root->node))
3950 status = btrfs_check_leaf(root, NULL, root->node);
3952 status = btrfs_check_node(root, NULL, root->node);
3953 if (status != BTRFS_TREE_BLOCK_CLEAN)
3956 if (btrfs_root_refs(root_item) > 0 ||
3957 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3958 path.nodes[level] = root->node;
3959 extent_buffer_get(root->node);
3960 path.slots[level] = 0;
3962 struct btrfs_key key;
3963 struct btrfs_disk_key found_key;
3965 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3966 level = root_item->drop_level;
3967 path.lowest_level = level;
3968 if (level > btrfs_header_level(root->node) ||
3969 level >= BTRFS_MAX_LEVEL) {
3970 error("ignoring invalid drop level: %u", level);
3973 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3976 btrfs_node_key(path.nodes[level], &found_key,
3978 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3979 sizeof(found_key)));
3983 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3989 wret = walk_up_tree(root, &path, wc, &level);
3996 btrfs_release_path(&path);
3998 if (!cache_tree_empty(&corrupt_blocks)) {
3999 struct cache_extent *cache;
4000 struct btrfs_corrupt_block *corrupt;
4002 printf("The following tree block(s) is corrupted in tree %llu:\n",
4003 root->root_key.objectid);
4004 cache = first_cache_extent(&corrupt_blocks);
4006 corrupt = container_of(cache,
4007 struct btrfs_corrupt_block,
4009 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4010 cache->start, corrupt->level,
4011 corrupt->key.objectid, corrupt->key.type,
4012 corrupt->key.offset);
4013 cache = next_cache_extent(cache);
4016 printf("Try to repair the btree for root %llu\n",
4017 root->root_key.objectid);
4018 ret = repair_btree(root, &corrupt_blocks);
4020 fprintf(stderr, "Failed to repair btree: %s\n",
4023 printf("Btree for root %llu is fixed\n",
4024 root->root_key.objectid);
4028 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4032 if (root_node.current) {
4033 root_node.current->checked = 1;
4034 maybe_free_inode_rec(&root_node.inode_cache,
4038 err = check_inode_recs(root, &root_node.inode_cache);
4042 free_corrupt_blocks_tree(&corrupt_blocks);
4043 root->fs_info->corrupt_blocks = NULL;
4044 free_orphan_data_extents(&root->orphan_data_extents);
4048 static int fs_root_objectid(u64 objectid)
4050 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4051 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4053 return is_fstree(objectid);
4056 static int check_fs_roots(struct btrfs_root *root,
4057 struct cache_tree *root_cache)
4059 struct btrfs_path path;
4060 struct btrfs_key key;
4061 struct walk_control wc;
4062 struct extent_buffer *leaf, *tree_node;
4063 struct btrfs_root *tmp_root;
4064 struct btrfs_root *tree_root = root->fs_info->tree_root;
4068 if (ctx.progress_enabled) {
4069 ctx.tp = TASK_FS_ROOTS;
4070 task_start(ctx.info);
4074 * Just in case we made any changes to the extent tree that weren't
4075 * reflected into the free space cache yet.
4078 reset_cached_block_groups(root->fs_info);
4079 memset(&wc, 0, sizeof(wc));
4080 cache_tree_init(&wc.shared);
4081 btrfs_init_path(&path);
4086 key.type = BTRFS_ROOT_ITEM_KEY;
4087 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4092 tree_node = tree_root->node;
4094 if (tree_node != tree_root->node) {
4095 free_root_recs_tree(root_cache);
4096 btrfs_release_path(&path);
4099 leaf = path.nodes[0];
4100 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4101 ret = btrfs_next_leaf(tree_root, &path);
4107 leaf = path.nodes[0];
4109 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4110 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4111 fs_root_objectid(key.objectid)) {
4112 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4113 tmp_root = btrfs_read_fs_root_no_cache(
4114 root->fs_info, &key);
4116 key.offset = (u64)-1;
4117 tmp_root = btrfs_read_fs_root(
4118 root->fs_info, &key);
4120 if (IS_ERR(tmp_root)) {
4124 ret = check_fs_root(tmp_root, root_cache, &wc);
4125 if (ret == -EAGAIN) {
4126 free_root_recs_tree(root_cache);
4127 btrfs_release_path(&path);
4132 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4133 btrfs_free_fs_root(tmp_root);
4134 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4135 key.type == BTRFS_ROOT_BACKREF_KEY) {
4136 process_root_ref(leaf, path.slots[0], &key,
4143 btrfs_release_path(&path);
4145 free_extent_cache_tree(&wc.shared);
4146 if (!cache_tree_empty(&wc.shared))
4147 fprintf(stderr, "warning line %d\n", __LINE__);
4149 task_stop(ctx.info);
4155 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4156 * INODE_REF/INODE_EXTREF match.
4158 * @root: the root of the fs/file tree
4159 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4160 * @key: the key of the DIR_ITEM/DIR_INDEX
4161 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4162 * distinguish root_dir between normal dir/file
4163 * @name: the name in the INODE_REF/INODE_EXTREF
4164 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4165 * @mode: the st_mode of INODE_ITEM
4167 * Return 0 if no error occurred.
4168 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4169 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4171 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4172 * not match for normal dir/file.
4174 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4175 struct btrfs_key *key, u64 index, char *name,
4176 u32 namelen, u32 mode)
4178 struct btrfs_path path;
4179 struct extent_buffer *node;
4180 struct btrfs_dir_item *di;
4181 struct btrfs_key location;
4182 char namebuf[BTRFS_NAME_LEN] = {0};
4192 btrfs_init_path(&path);
4193 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4195 ret = DIR_ITEM_MISSING;
4199 /* Process root dir and goto out*/
4202 ret = ROOT_DIR_ERROR;
4204 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4206 ref_key->type == BTRFS_INODE_REF_KEY ?
4208 ref_key->objectid, ref_key->offset,
4209 key->type == BTRFS_DIR_ITEM_KEY ?
4210 "DIR_ITEM" : "DIR_INDEX");
4218 /* Process normal file/dir */
4220 ret = DIR_ITEM_MISSING;
4222 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4224 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4225 ref_key->objectid, ref_key->offset,
4226 key->type == BTRFS_DIR_ITEM_KEY ?
4227 "DIR_ITEM" : "DIR_INDEX",
4228 key->objectid, key->offset, namelen, name,
4229 imode_to_type(mode));
4233 /* Check whether inode_id/filetype/name match */
4234 node = path.nodes[0];
4235 slot = path.slots[0];
4236 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4237 total = btrfs_item_size_nr(node, slot);
4238 while (cur < total) {
4239 ret = DIR_ITEM_MISMATCH;
4240 name_len = btrfs_dir_name_len(node, di);
4241 data_len = btrfs_dir_data_len(node, di);
4243 btrfs_dir_item_key_to_cpu(node, di, &location);
4244 if (location.objectid != ref_key->objectid ||
4245 location.type != BTRFS_INODE_ITEM_KEY ||
4246 location.offset != 0)
4249 filetype = btrfs_dir_type(node, di);
4250 if (imode_to_type(mode) != filetype)
4253 if (cur + sizeof(*di) + name_len > total ||
4254 name_len > BTRFS_NAME_LEN) {
4255 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4257 key->type == BTRFS_DIR_ITEM_KEY ?
4258 "DIR_ITEM" : "DIR_INDEX",
4259 key->objectid, key->offset, name_len);
4261 if (cur + sizeof(*di) > total)
4263 len = min_t(u32, total - cur - sizeof(*di),
4269 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4270 if (len != namelen || strncmp(namebuf, name, len))
4276 len = sizeof(*di) + name_len + data_len;
4277 di = (struct btrfs_dir_item *)((char *)di + len);
4280 if (ret == DIR_ITEM_MISMATCH)
4282 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4284 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4285 ref_key->objectid, ref_key->offset,
4286 key->type == BTRFS_DIR_ITEM_KEY ?
4287 "DIR_ITEM" : "DIR_INDEX",
4288 key->objectid, key->offset, namelen, name,
4289 imode_to_type(mode));
4291 btrfs_release_path(&path);
4296 * Traverse the given INODE_REF and call find_dir_item() to find related
4297 * DIR_ITEM/DIR_INDEX.
4299 * @root: the root of the fs/file tree
4300 * @ref_key: the key of the INODE_REF
4301 * @refs: the count of INODE_REF
4302 * @mode: the st_mode of INODE_ITEM
4304 * Return 0 if no error occurred.
4306 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4307 struct extent_buffer *node, int slot, u64 *refs,
4310 struct btrfs_key key;
4311 struct btrfs_inode_ref *ref;
4312 char namebuf[BTRFS_NAME_LEN] = {0};
4320 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4321 total = btrfs_item_size_nr(node, slot);
4324 /* Update inode ref count */
4327 index = btrfs_inode_ref_index(node, ref);
4328 name_len = btrfs_inode_ref_name_len(node, ref);
4329 if (cur + sizeof(*ref) + name_len > total ||
4330 name_len > BTRFS_NAME_LEN) {
4331 warning("root %llu INODE_REF[%llu %llu] name too long",
4332 root->objectid, ref_key->objectid, ref_key->offset);
4334 if (total < cur + sizeof(*ref))
4336 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4341 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4343 /* Check root dir ref name */
4344 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4345 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4346 root->objectid, ref_key->objectid, ref_key->offset,
4348 err |= ROOT_DIR_ERROR;
4351 /* Find related DIR_INDEX */
4352 key.objectid = ref_key->offset;
4353 key.type = BTRFS_DIR_INDEX_KEY;
4355 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4358 /* Find related dir_item */
4359 key.objectid = ref_key->offset;
4360 key.type = BTRFS_DIR_ITEM_KEY;
4361 key.offset = btrfs_name_hash(namebuf, len);
4362 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4365 len = sizeof(*ref) + name_len;
4366 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4376 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4377 * DIR_ITEM/DIR_INDEX.
4379 * @root: the root of the fs/file tree
4380 * @ref_key: the key of the INODE_EXTREF
4381 * @refs: the count of INODE_EXTREF
4382 * @mode: the st_mode of INODE_ITEM
4384 * Return 0 if no error occurred.
4386 static int check_inode_extref(struct btrfs_root *root,
4387 struct btrfs_key *ref_key,
4388 struct extent_buffer *node, int slot, u64 *refs,
4391 struct btrfs_key key;
4392 struct btrfs_inode_extref *extref;
4393 char namebuf[BTRFS_NAME_LEN] = {0};
4403 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4404 total = btrfs_item_size_nr(node, slot);
4407 /* update inode ref count */
4409 name_len = btrfs_inode_extref_name_len(node, extref);
4410 index = btrfs_inode_extref_index(node, extref);
4411 parent = btrfs_inode_extref_parent(node, extref);
4412 if (name_len <= BTRFS_NAME_LEN) {
4415 len = BTRFS_NAME_LEN;
4416 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4417 root->objectid, ref_key->objectid, ref_key->offset);
4419 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4421 /* Check root dir ref name */
4422 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4423 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4424 root->objectid, ref_key->objectid, ref_key->offset,
4426 err |= ROOT_DIR_ERROR;
4429 /* find related dir_index */
4430 key.objectid = parent;
4431 key.type = BTRFS_DIR_INDEX_KEY;
4433 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4436 /* find related dir_item */
4437 key.objectid = parent;
4438 key.type = BTRFS_DIR_ITEM_KEY;
4439 key.offset = btrfs_name_hash(namebuf, len);
4440 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4443 len = sizeof(*extref) + name_len;
4444 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4454 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4455 * DIR_ITEM/DIR_INDEX match.
4457 * @root: the root of the fs/file tree
4458 * @key: the key of the INODE_REF/INODE_EXTREF
4459 * @name: the name in the INODE_REF/INODE_EXTREF
4460 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4461 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4463 * @ext_ref: the EXTENDED_IREF feature
4465 * Return 0 if no error occurred.
4466 * Return >0 for error bitmap
4468 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4469 char *name, int namelen, u64 index,
4470 unsigned int ext_ref)
4472 struct btrfs_path path;
4473 struct btrfs_inode_ref *ref;
4474 struct btrfs_inode_extref *extref;
4475 struct extent_buffer *node;
4476 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4487 btrfs_init_path(&path);
4488 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4490 ret = INODE_REF_MISSING;
4494 node = path.nodes[0];
4495 slot = path.slots[0];
4497 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4498 total = btrfs_item_size_nr(node, slot);
4500 /* Iterate all entry of INODE_REF */
4501 while (cur < total) {
4502 ret = INODE_REF_MISSING;
4504 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4505 ref_index = btrfs_inode_ref_index(node, ref);
4506 if (index != (u64)-1 && index != ref_index)
4509 if (cur + sizeof(*ref) + ref_namelen > total ||
4510 ref_namelen > BTRFS_NAME_LEN) {
4511 warning("root %llu INODE %s[%llu %llu] name too long",
4513 key->type == BTRFS_INODE_REF_KEY ?
4515 key->objectid, key->offset);
4517 if (cur + sizeof(*ref) > total)
4519 len = min_t(u32, total - cur - sizeof(*ref),
4525 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4528 if (len != namelen || strncmp(ref_namebuf, name, len))
4534 len = sizeof(*ref) + ref_namelen;
4535 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4540 /* Skip if not support EXTENDED_IREF feature */
4544 btrfs_release_path(&path);
4545 btrfs_init_path(&path);
4547 dir_id = key->offset;
4548 key->type = BTRFS_INODE_EXTREF_KEY;
4549 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4551 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4553 ret = INODE_REF_MISSING;
4557 node = path.nodes[0];
4558 slot = path.slots[0];
4560 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4562 total = btrfs_item_size_nr(node, slot);
4564 /* Iterate all entry of INODE_EXTREF */
4565 while (cur < total) {
4566 ret = INODE_REF_MISSING;
4568 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4569 ref_index = btrfs_inode_extref_index(node, extref);
4570 parent = btrfs_inode_extref_parent(node, extref);
4571 if (index != (u64)-1 && index != ref_index)
4574 if (parent != dir_id)
4577 if (ref_namelen <= BTRFS_NAME_LEN) {
4580 len = BTRFS_NAME_LEN;
4581 warning("root %llu INODE %s[%llu %llu] name too long",
4583 key->type == BTRFS_INODE_REF_KEY ?
4585 key->objectid, key->offset);
4587 read_extent_buffer(node, ref_namebuf,
4588 (unsigned long)(extref + 1), len);
4590 if (len != namelen || strncmp(ref_namebuf, name, len))
4597 len = sizeof(*extref) + ref_namelen;
4598 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4603 btrfs_release_path(&path);
4608 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4609 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4611 * @root: the root of the fs/file tree
4612 * @key: the key of the INODE_REF/INODE_EXTREF
4613 * @size: the st_size of the INODE_ITEM
4614 * @ext_ref: the EXTENDED_IREF feature
4616 * Return 0 if no error occurred.
4618 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4619 struct extent_buffer *node, int slot, u64 *size,
4620 unsigned int ext_ref)
4622 struct btrfs_dir_item *di;
4623 struct btrfs_inode_item *ii;
4624 struct btrfs_path path;
4625 struct btrfs_key location;
4626 char namebuf[BTRFS_NAME_LEN] = {0};
4639 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4640 * ignore index check.
4642 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4644 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4645 total = btrfs_item_size_nr(node, slot);
4647 while (cur < total) {
4648 data_len = btrfs_dir_data_len(node, di);
4650 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4651 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4652 "DIR_ITEM" : "DIR_INDEX",
4653 key->objectid, key->offset, data_len);
4655 name_len = btrfs_dir_name_len(node, di);
4656 if (cur + sizeof(*di) + name_len > total ||
4657 name_len > BTRFS_NAME_LEN) {
4658 warning("root %llu %s[%llu %llu] name too long",
4660 key->type == BTRFS_DIR_ITEM_KEY ?
4661 "DIR_ITEM" : "DIR_INDEX",
4662 key->objectid, key->offset);
4664 if (cur + sizeof(*di) > total)
4666 len = min_t(u32, total - cur - sizeof(*di),
4671 (*size) += name_len;
4673 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4674 filetype = btrfs_dir_type(node, di);
4676 if (key->type == BTRFS_DIR_ITEM_KEY &&
4677 key->offset != btrfs_name_hash(namebuf, len)) {
4679 error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
4680 root->objectid, key->objectid, key->offset,
4681 namebuf, len, filetype, key->offset,
4682 btrfs_name_hash(namebuf, len));
4685 btrfs_init_path(&path);
4686 btrfs_dir_item_key_to_cpu(node, di, &location);
4688 /* Ignore related ROOT_ITEM check */
4689 if (location.type == BTRFS_ROOT_ITEM_KEY)
4692 /* Check relative INODE_ITEM(existence/filetype) */
4693 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4695 err |= INODE_ITEM_MISSING;
4696 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4697 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4698 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4699 key->offset, location.objectid, name_len,
4704 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4705 struct btrfs_inode_item);
4706 mode = btrfs_inode_mode(path.nodes[0], ii);
4708 if (imode_to_type(mode) != filetype) {
4709 err |= INODE_ITEM_MISMATCH;
4710 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4711 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713 key->offset, name_len, namebuf, filetype);
4716 /* Check relative INODE_REF/INODE_EXTREF */
4717 location.type = BTRFS_INODE_REF_KEY;
4718 location.offset = key->objectid;
4719 ret = find_inode_ref(root, &location, namebuf, len,
4722 if (ret & INODE_REF_MISSING)
4723 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4724 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4725 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4726 key->offset, name_len, namebuf, filetype);
4729 btrfs_release_path(&path);
4730 len = sizeof(*di) + name_len + data_len;
4731 di = (struct btrfs_dir_item *)((char *)di + len);
4734 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4735 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4736 root->objectid, key->objectid, key->offset);
4745 * Check file extent datasum/hole, update the size of the file extents,
4746 * check and update the last offset of the file extent.
4748 * @root: the root of fs/file tree.
4749 * @fkey: the key of the file extent.
4750 * @nodatasum: INODE_NODATASUM feature.
4751 * @size: the sum of all EXTENT_DATA items size for this inode.
4752 * @end: the offset of the last extent.
4754 * Return 0 if no error occurred.
4756 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4757 struct extent_buffer *node, int slot,
4758 unsigned int nodatasum, u64 *size, u64 *end)
4760 struct btrfs_file_extent_item *fi;
4763 u64 extent_num_bytes;
4765 u64 csum_found; /* In byte size, sectorsize aligned */
4766 u64 search_start; /* Logical range start we search for csum */
4767 u64 search_len; /* Logical range len we search for csum */
4768 unsigned int extent_type;
4769 unsigned int is_hole;
4774 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4776 /* Check inline extent */
4777 extent_type = btrfs_file_extent_type(node, fi);
4778 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4779 struct btrfs_item *e = btrfs_item_nr(slot);
4780 u32 item_inline_len;
4782 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4783 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4784 compressed = btrfs_file_extent_compression(node, fi);
4785 if (extent_num_bytes == 0) {
4787 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4788 root->objectid, fkey->objectid, fkey->offset);
4789 err |= FILE_EXTENT_ERROR;
4791 if (!compressed && extent_num_bytes != item_inline_len) {
4793 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4794 root->objectid, fkey->objectid, fkey->offset,
4795 extent_num_bytes, item_inline_len);
4796 err |= FILE_EXTENT_ERROR;
4798 *end += extent_num_bytes;
4799 *size += extent_num_bytes;
4803 /* Check extent type */
4804 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4805 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4806 err |= FILE_EXTENT_ERROR;
4807 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4808 root->objectid, fkey->objectid, fkey->offset);
4812 /* Check REG_EXTENT/PREALLOC_EXTENT */
4813 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4814 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4815 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4816 extent_offset = btrfs_file_extent_offset(node, fi);
4817 compressed = btrfs_file_extent_compression(node, fi);
4818 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4821 * Check EXTENT_DATA csum
4823 * For plain (uncompressed) extent, we should only check the range
4824 * we're referring to, as it's possible that part of prealloc extent
4825 * has been written, and has csum:
4827 * |<--- Original large preallocated extent A ---->|
4828 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4831 * For compressed extent, we should check the whole range.
4834 search_start = disk_bytenr + extent_offset;
4835 search_len = extent_num_bytes;
4837 search_start = disk_bytenr;
4838 search_len = disk_num_bytes;
4840 ret = count_csum_range(root, search_start, search_len, &csum_found);
4841 if (csum_found > 0 && nodatasum) {
4842 err |= ODD_CSUM_ITEM;
4843 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4844 root->objectid, fkey->objectid, fkey->offset);
4845 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4846 !is_hole && (ret < 0 || csum_found < search_len)) {
4847 err |= CSUM_ITEM_MISSING;
4848 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4849 root->objectid, fkey->objectid, fkey->offset,
4850 csum_found, search_len);
4851 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4852 err |= ODD_CSUM_ITEM;
4853 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4854 root->objectid, fkey->objectid, fkey->offset, csum_found);
4857 /* Check EXTENT_DATA hole */
4858 if (!no_holes && *end != fkey->offset) {
4859 err |= FILE_EXTENT_ERROR;
4860 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4861 root->objectid, fkey->objectid, fkey->offset);
4864 *end += extent_num_bytes;
4866 *size += extent_num_bytes;
4872 * Check INODE_ITEM and related ITEMs (the same inode number)
4873 * 1. check link count
4874 * 2. check inode ref/extref
4875 * 3. check dir item/index
4877 * @ext_ref: the EXTENDED_IREF feature
4879 * Return 0 if no error occurred.
4880 * Return >0 for error or hit the traversal is done(by error bitmap)
4882 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4883 unsigned int ext_ref)
4885 struct extent_buffer *node;
4886 struct btrfs_inode_item *ii;
4887 struct btrfs_key key;
4896 u64 extent_size = 0;
4898 unsigned int nodatasum;
4903 node = path->nodes[0];
4904 slot = path->slots[0];
4906 btrfs_item_key_to_cpu(node, &key, slot);
4907 inode_id = key.objectid;
4909 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4910 ret = btrfs_next_item(root, path);
4916 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4917 isize = btrfs_inode_size(node, ii);
4918 nbytes = btrfs_inode_nbytes(node, ii);
4919 mode = btrfs_inode_mode(node, ii);
4920 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4921 nlink = btrfs_inode_nlink(node, ii);
4922 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4925 ret = btrfs_next_item(root, path);
4927 /* out will fill 'err' rusing current statistics */
4929 } else if (ret > 0) {
4934 node = path->nodes[0];
4935 slot = path->slots[0];
4936 btrfs_item_key_to_cpu(node, &key, slot);
4937 if (key.objectid != inode_id)
4941 case BTRFS_INODE_REF_KEY:
4942 ret = check_inode_ref(root, &key, node, slot, &refs,
4946 case BTRFS_INODE_EXTREF_KEY:
4947 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4948 warning("root %llu EXTREF[%llu %llu] isn't supported",
4949 root->objectid, key.objectid,
4951 ret = check_inode_extref(root, &key, node, slot, &refs,
4955 case BTRFS_DIR_ITEM_KEY:
4956 case BTRFS_DIR_INDEX_KEY:
4958 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4959 root->objectid, inode_id,
4960 imode_to_type(mode), key.objectid,
4963 ret = check_dir_item(root, &key, node, slot, &size,
4967 case BTRFS_EXTENT_DATA_KEY:
4969 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4970 root->objectid, inode_id, key.objectid,
4973 ret = check_file_extent(root, &key, node, slot,
4974 nodatasum, &extent_size,
4978 case BTRFS_XATTR_ITEM_KEY:
4981 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4982 key.objectid, key.type, key.offset);
4987 /* verify INODE_ITEM nlink/isize/nbytes */
4990 err |= LINK_COUNT_ERROR;
4991 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4992 root->objectid, inode_id, nlink);
4996 * Just a warning, as dir inode nbytes is just an
4997 * instructive value.
4999 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5000 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5001 root->objectid, inode_id,
5002 root->fs_info->nodesize);
5005 if (isize != size) {
5007 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
5008 root->objectid, inode_id, isize, size);
5011 if (nlink != refs) {
5012 err |= LINK_COUNT_ERROR;
5013 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5014 root->objectid, inode_id, nlink, refs);
5015 } else if (!nlink) {
5019 if (!nbytes && !no_holes && extent_end < isize) {
5020 err |= NBYTES_ERROR;
5021 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5022 root->objectid, inode_id, isize);
5025 if (nbytes != extent_size) {
5026 err |= NBYTES_ERROR;
5027 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5028 root->objectid, inode_id, nbytes, extent_size);
5035 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5037 struct btrfs_path path;
5038 struct btrfs_key key;
5042 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5043 key.type = BTRFS_INODE_ITEM_KEY;
5046 /* For root being dropped, we don't need to check first inode */
5047 if (btrfs_root_refs(&root->root_item) == 0 &&
5048 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5052 btrfs_init_path(&path);
5054 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5059 err |= INODE_ITEM_MISSING;
5060 error("first inode item of root %llu is missing",
5064 err |= check_inode_item(root, &path, ext_ref);
5069 btrfs_release_path(&path);
5074 * Iterate all item on the tree and call check_inode_item() to check.
5076 * @root: the root of the tree to be checked.
5077 * @ext_ref: the EXTENDED_IREF feature
5079 * Return 0 if no error found.
5080 * Return <0 for error.
5082 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5084 struct btrfs_path path;
5085 struct node_refs nrefs;
5086 struct btrfs_root_item *root_item = &root->root_item;
5092 * We need to manually check the first inode item(256)
5093 * As the following traversal function will only start from
5094 * the first inode item in the leaf, if inode item(256) is missing
5095 * we will just skip it forever.
5097 ret = check_fs_first_inode(root, ext_ref);
5101 memset(&nrefs, 0, sizeof(nrefs));
5102 level = btrfs_header_level(root->node);
5103 btrfs_init_path(&path);
5105 if (btrfs_root_refs(root_item) > 0 ||
5106 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5107 path.nodes[level] = root->node;
5108 path.slots[level] = 0;
5109 extent_buffer_get(root->node);
5111 struct btrfs_key key;
5113 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5114 level = root_item->drop_level;
5115 path.lowest_level = level;
5116 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5123 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5126 /* if ret is negative, walk shall stop */
5132 ret = walk_up_tree_v2(root, &path, &level);
5134 /* Normal exit, reset ret to err */
5141 btrfs_release_path(&path);
5146 * Find the relative ref for root_ref and root_backref.
5148 * @root: the root of the root tree.
5149 * @ref_key: the key of the root ref.
5151 * Return 0 if no error occurred.
5153 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5154 struct extent_buffer *node, int slot)
5156 struct btrfs_path path;
5157 struct btrfs_key key;
5158 struct btrfs_root_ref *ref;
5159 struct btrfs_root_ref *backref;
5160 char ref_name[BTRFS_NAME_LEN] = {0};
5161 char backref_name[BTRFS_NAME_LEN] = {0};
5167 u32 backref_namelen;
5172 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5173 ref_dirid = btrfs_root_ref_dirid(node, ref);
5174 ref_seq = btrfs_root_ref_sequence(node, ref);
5175 ref_namelen = btrfs_root_ref_name_len(node, ref);
5177 if (ref_namelen <= BTRFS_NAME_LEN) {
5180 len = BTRFS_NAME_LEN;
5181 warning("%s[%llu %llu] ref_name too long",
5182 ref_key->type == BTRFS_ROOT_REF_KEY ?
5183 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5186 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5188 /* Find relative root_ref */
5189 key.objectid = ref_key->offset;
5190 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5191 key.offset = ref_key->objectid;
5193 btrfs_init_path(&path);
5194 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5196 err |= ROOT_REF_MISSING;
5197 error("%s[%llu %llu] couldn't find relative ref",
5198 ref_key->type == BTRFS_ROOT_REF_KEY ?
5199 "ROOT_REF" : "ROOT_BACKREF",
5200 ref_key->objectid, ref_key->offset);
5204 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5205 struct btrfs_root_ref);
5206 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5207 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5208 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5210 if (backref_namelen <= BTRFS_NAME_LEN) {
5211 len = backref_namelen;
5213 len = BTRFS_NAME_LEN;
5214 warning("%s[%llu %llu] ref_name too long",
5215 key.type == BTRFS_ROOT_REF_KEY ?
5216 "ROOT_REF" : "ROOT_BACKREF",
5217 key.objectid, key.offset);
5219 read_extent_buffer(path.nodes[0], backref_name,
5220 (unsigned long)(backref + 1), len);
5222 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5223 ref_namelen != backref_namelen ||
5224 strncmp(ref_name, backref_name, len)) {
5225 err |= ROOT_REF_MISMATCH;
5226 error("%s[%llu %llu] mismatch relative ref",
5227 ref_key->type == BTRFS_ROOT_REF_KEY ?
5228 "ROOT_REF" : "ROOT_BACKREF",
5229 ref_key->objectid, ref_key->offset);
5232 btrfs_release_path(&path);
5237 * Check all fs/file tree in low_memory mode.
5239 * 1. for fs tree root item, call check_fs_root_v2()
5240 * 2. for fs tree root ref/backref, call check_root_ref()
5242 * Return 0 if no error occurred.
5244 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5246 struct btrfs_root *tree_root = fs_info->tree_root;
5247 struct btrfs_root *cur_root = NULL;
5248 struct btrfs_path path;
5249 struct btrfs_key key;
5250 struct extent_buffer *node;
5251 unsigned int ext_ref;
5256 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5258 btrfs_init_path(&path);
5259 key.objectid = BTRFS_FS_TREE_OBJECTID;
5261 key.type = BTRFS_ROOT_ITEM_KEY;
5263 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5267 } else if (ret > 0) {
5273 node = path.nodes[0];
5274 slot = path.slots[0];
5275 btrfs_item_key_to_cpu(node, &key, slot);
5276 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5278 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5279 fs_root_objectid(key.objectid)) {
5280 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5281 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5284 key.offset = (u64)-1;
5285 cur_root = btrfs_read_fs_root(fs_info, &key);
5288 if (IS_ERR(cur_root)) {
5289 error("Fail to read fs/subvol tree: %lld",
5295 ret = check_fs_root_v2(cur_root, ext_ref);
5298 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5299 btrfs_free_fs_root(cur_root);
5300 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5301 key.type == BTRFS_ROOT_BACKREF_KEY) {
5302 ret = check_root_ref(tree_root, &key, node, slot);
5306 ret = btrfs_next_item(tree_root, &path);
5316 btrfs_release_path(&path);
5320 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5322 struct list_head *cur = rec->backrefs.next;
5323 struct extent_backref *back;
5324 struct tree_backref *tback;
5325 struct data_backref *dback;
5329 while(cur != &rec->backrefs) {
5330 back = to_extent_backref(cur);
5332 if (!back->found_extent_tree) {
5336 if (back->is_data) {
5337 dback = to_data_backref(back);
5338 fprintf(stderr, "Backref %llu %s %llu"
5339 " owner %llu offset %llu num_refs %lu"
5340 " not found in extent tree\n",
5341 (unsigned long long)rec->start,
5342 back->full_backref ?
5344 back->full_backref ?
5345 (unsigned long long)dback->parent:
5346 (unsigned long long)dback->root,
5347 (unsigned long long)dback->owner,
5348 (unsigned long long)dback->offset,
5349 (unsigned long)dback->num_refs);
5351 tback = to_tree_backref(back);
5352 fprintf(stderr, "Backref %llu parent %llu"
5353 " root %llu not found in extent tree\n",
5354 (unsigned long long)rec->start,
5355 (unsigned long long)tback->parent,
5356 (unsigned long long)tback->root);
5359 if (!back->is_data && !back->found_ref) {
5363 tback = to_tree_backref(back);
5364 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5365 (unsigned long long)rec->start,
5366 back->full_backref ? "parent" : "root",
5367 back->full_backref ?
5368 (unsigned long long)tback->parent :
5369 (unsigned long long)tback->root, back);
5371 if (back->is_data) {
5372 dback = to_data_backref(back);
5373 if (dback->found_ref != dback->num_refs) {
5377 fprintf(stderr, "Incorrect local backref count"
5378 " on %llu %s %llu owner %llu"
5379 " offset %llu found %u wanted %u back %p\n",
5380 (unsigned long long)rec->start,
5381 back->full_backref ?
5383 back->full_backref ?
5384 (unsigned long long)dback->parent:
5385 (unsigned long long)dback->root,
5386 (unsigned long long)dback->owner,
5387 (unsigned long long)dback->offset,
5388 dback->found_ref, dback->num_refs, back);
5390 if (dback->disk_bytenr != rec->start) {
5394 fprintf(stderr, "Backref disk bytenr does not"
5395 " match extent record, bytenr=%llu, "
5396 "ref bytenr=%llu\n",
5397 (unsigned long long)rec->start,
5398 (unsigned long long)dback->disk_bytenr);
5401 if (dback->bytes != rec->nr) {
5405 fprintf(stderr, "Backref bytes do not match "
5406 "extent backref, bytenr=%llu, ref "
5407 "bytes=%llu, backref bytes=%llu\n",
5408 (unsigned long long)rec->start,
5409 (unsigned long long)rec->nr,
5410 (unsigned long long)dback->bytes);
5413 if (!back->is_data) {
5416 dback = to_data_backref(back);
5417 found += dback->found_ref;
5420 if (found != rec->refs) {
5424 fprintf(stderr, "Incorrect global backref count "
5425 "on %llu found %llu wanted %llu\n",
5426 (unsigned long long)rec->start,
5427 (unsigned long long)found,
5428 (unsigned long long)rec->refs);
5434 static int free_all_extent_backrefs(struct extent_record *rec)
5436 struct extent_backref *back;
5437 struct list_head *cur;
5438 while (!list_empty(&rec->backrefs)) {
5439 cur = rec->backrefs.next;
5440 back = to_extent_backref(cur);
5447 static void free_extent_record_cache(struct cache_tree *extent_cache)
5449 struct cache_extent *cache;
5450 struct extent_record *rec;
5453 cache = first_cache_extent(extent_cache);
5456 rec = container_of(cache, struct extent_record, cache);
5457 remove_cache_extent(extent_cache, cache);
5458 free_all_extent_backrefs(rec);
5463 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5464 struct extent_record *rec)
5466 if (rec->content_checked && rec->owner_ref_checked &&
5467 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5468 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5469 !rec->bad_full_backref && !rec->crossing_stripes &&
5470 !rec->wrong_chunk_type) {
5471 remove_cache_extent(extent_cache, &rec->cache);
5472 free_all_extent_backrefs(rec);
5473 list_del_init(&rec->list);
5479 static int check_owner_ref(struct btrfs_root *root,
5480 struct extent_record *rec,
5481 struct extent_buffer *buf)
5483 struct extent_backref *node;
5484 struct tree_backref *back;
5485 struct btrfs_root *ref_root;
5486 struct btrfs_key key;
5487 struct btrfs_path path;
5488 struct extent_buffer *parent;
5493 list_for_each_entry(node, &rec->backrefs, list) {
5496 if (!node->found_ref)
5498 if (node->full_backref)
5500 back = to_tree_backref(node);
5501 if (btrfs_header_owner(buf) == back->root)
5504 BUG_ON(rec->is_root);
5506 /* try to find the block by search corresponding fs tree */
5507 key.objectid = btrfs_header_owner(buf);
5508 key.type = BTRFS_ROOT_ITEM_KEY;
5509 key.offset = (u64)-1;
5511 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5512 if (IS_ERR(ref_root))
5515 level = btrfs_header_level(buf);
5517 btrfs_item_key_to_cpu(buf, &key, 0);
5519 btrfs_node_key_to_cpu(buf, &key, 0);
5521 btrfs_init_path(&path);
5522 path.lowest_level = level + 1;
5523 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5527 parent = path.nodes[level + 1];
5528 if (parent && buf->start == btrfs_node_blockptr(parent,
5529 path.slots[level + 1]))
5532 btrfs_release_path(&path);
5533 return found ? 0 : 1;
5536 static int is_extent_tree_record(struct extent_record *rec)
5538 struct list_head *cur = rec->backrefs.next;
5539 struct extent_backref *node;
5540 struct tree_backref *back;
5543 while(cur != &rec->backrefs) {
5544 node = to_extent_backref(cur);
5548 back = to_tree_backref(node);
5549 if (node->full_backref)
5551 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5558 static int record_bad_block_io(struct btrfs_fs_info *info,
5559 struct cache_tree *extent_cache,
5562 struct extent_record *rec;
5563 struct cache_extent *cache;
5564 struct btrfs_key key;
5566 cache = lookup_cache_extent(extent_cache, start, len);
5570 rec = container_of(cache, struct extent_record, cache);
5571 if (!is_extent_tree_record(rec))
5574 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5575 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5578 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5579 struct extent_buffer *buf, int slot)
5581 if (btrfs_header_level(buf)) {
5582 struct btrfs_key_ptr ptr1, ptr2;
5584 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5585 sizeof(struct btrfs_key_ptr));
5586 read_extent_buffer(buf, &ptr2,
5587 btrfs_node_key_ptr_offset(slot + 1),
5588 sizeof(struct btrfs_key_ptr));
5589 write_extent_buffer(buf, &ptr1,
5590 btrfs_node_key_ptr_offset(slot + 1),
5591 sizeof(struct btrfs_key_ptr));
5592 write_extent_buffer(buf, &ptr2,
5593 btrfs_node_key_ptr_offset(slot),
5594 sizeof(struct btrfs_key_ptr));
5596 struct btrfs_disk_key key;
5597 btrfs_node_key(buf, &key, 0);
5598 btrfs_fixup_low_keys(root, path, &key,
5599 btrfs_header_level(buf) + 1);
5602 struct btrfs_item *item1, *item2;
5603 struct btrfs_key k1, k2;
5604 char *item1_data, *item2_data;
5605 u32 item1_offset, item2_offset, item1_size, item2_size;
5607 item1 = btrfs_item_nr(slot);
5608 item2 = btrfs_item_nr(slot + 1);
5609 btrfs_item_key_to_cpu(buf, &k1, slot);
5610 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5611 item1_offset = btrfs_item_offset(buf, item1);
5612 item2_offset = btrfs_item_offset(buf, item2);
5613 item1_size = btrfs_item_size(buf, item1);
5614 item2_size = btrfs_item_size(buf, item2);
5616 item1_data = malloc(item1_size);
5619 item2_data = malloc(item2_size);
5625 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5626 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5628 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5629 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5633 btrfs_set_item_offset(buf, item1, item2_offset);
5634 btrfs_set_item_offset(buf, item2, item1_offset);
5635 btrfs_set_item_size(buf, item1, item2_size);
5636 btrfs_set_item_size(buf, item2, item1_size);
5638 path->slots[0] = slot;
5639 btrfs_set_item_key_unsafe(root, path, &k2);
5640 path->slots[0] = slot + 1;
5641 btrfs_set_item_key_unsafe(root, path, &k1);
5646 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5648 struct extent_buffer *buf;
5649 struct btrfs_key k1, k2;
5651 int level = path->lowest_level;
5654 buf = path->nodes[level];
5655 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5657 btrfs_node_key_to_cpu(buf, &k1, i);
5658 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5660 btrfs_item_key_to_cpu(buf, &k1, i);
5661 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5663 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5665 ret = swap_values(root, path, buf, i);
5668 btrfs_mark_buffer_dirty(buf);
5674 static int delete_bogus_item(struct btrfs_root *root,
5675 struct btrfs_path *path,
5676 struct extent_buffer *buf, int slot)
5678 struct btrfs_key key;
5679 int nritems = btrfs_header_nritems(buf);
5681 btrfs_item_key_to_cpu(buf, &key, slot);
5683 /* These are all the keys we can deal with missing. */
5684 if (key.type != BTRFS_DIR_INDEX_KEY &&
5685 key.type != BTRFS_EXTENT_ITEM_KEY &&
5686 key.type != BTRFS_METADATA_ITEM_KEY &&
5687 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5688 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5691 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5692 (unsigned long long)key.objectid, key.type,
5693 (unsigned long long)key.offset, slot, buf->start);
5694 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5695 btrfs_item_nr_offset(slot + 1),
5696 sizeof(struct btrfs_item) *
5697 (nritems - slot - 1));
5698 btrfs_set_header_nritems(buf, nritems - 1);
5700 struct btrfs_disk_key disk_key;
5702 btrfs_item_key(buf, &disk_key, 0);
5703 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5705 btrfs_mark_buffer_dirty(buf);
5709 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5711 struct extent_buffer *buf;
5715 /* We should only get this for leaves */
5716 BUG_ON(path->lowest_level);
5717 buf = path->nodes[0];
5719 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5720 unsigned int shift = 0, offset;
5722 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5723 BTRFS_LEAF_DATA_SIZE(root)) {
5724 if (btrfs_item_end_nr(buf, i) >
5725 BTRFS_LEAF_DATA_SIZE(root)) {
5726 ret = delete_bogus_item(root, path, buf, i);
5729 fprintf(stderr, "item is off the end of the "
5730 "leaf, can't fix\n");
5734 shift = BTRFS_LEAF_DATA_SIZE(root) -
5735 btrfs_item_end_nr(buf, i);
5736 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5737 btrfs_item_offset_nr(buf, i - 1)) {
5738 if (btrfs_item_end_nr(buf, i) >
5739 btrfs_item_offset_nr(buf, i - 1)) {
5740 ret = delete_bogus_item(root, path, buf, i);
5743 fprintf(stderr, "items overlap, can't fix\n");
5747 shift = btrfs_item_offset_nr(buf, i - 1) -
5748 btrfs_item_end_nr(buf, i);
5753 printf("Shifting item nr %d by %u bytes in block %llu\n",
5754 i, shift, (unsigned long long)buf->start);
5755 offset = btrfs_item_offset_nr(buf, i);
5756 memmove_extent_buffer(buf,
5757 btrfs_leaf_data(buf) + offset + shift,
5758 btrfs_leaf_data(buf) + offset,
5759 btrfs_item_size_nr(buf, i));
5760 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5762 btrfs_mark_buffer_dirty(buf);
5766 * We may have moved things, in which case we want to exit so we don't
5767 * write those changes out. Once we have proper abort functionality in
5768 * progs this can be changed to something nicer.
5775 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5776 * then just return -EIO.
5778 static int try_to_fix_bad_block(struct btrfs_root *root,
5779 struct extent_buffer *buf,
5780 enum btrfs_tree_block_status status)
5782 struct btrfs_trans_handle *trans;
5783 struct ulist *roots;
5784 struct ulist_node *node;
5785 struct btrfs_root *search_root;
5786 struct btrfs_path path;
5787 struct ulist_iterator iter;
5788 struct btrfs_key root_key, key;
5791 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5792 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5795 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5799 btrfs_init_path(&path);
5800 ULIST_ITER_INIT(&iter);
5801 while ((node = ulist_next(roots, &iter))) {
5802 root_key.objectid = node->val;
5803 root_key.type = BTRFS_ROOT_ITEM_KEY;
5804 root_key.offset = (u64)-1;
5806 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5813 trans = btrfs_start_transaction(search_root, 0);
5814 if (IS_ERR(trans)) {
5815 ret = PTR_ERR(trans);
5819 path.lowest_level = btrfs_header_level(buf);
5820 path.skip_check_block = 1;
5821 if (path.lowest_level)
5822 btrfs_node_key_to_cpu(buf, &key, 0);
5824 btrfs_item_key_to_cpu(buf, &key, 0);
5825 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5828 btrfs_commit_transaction(trans, search_root);
5831 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5832 ret = fix_key_order(search_root, &path);
5833 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5834 ret = fix_item_offset(search_root, &path);
5836 btrfs_commit_transaction(trans, search_root);
5839 btrfs_release_path(&path);
5840 btrfs_commit_transaction(trans, search_root);
5843 btrfs_release_path(&path);
5847 static int check_block(struct btrfs_root *root,
5848 struct cache_tree *extent_cache,
5849 struct extent_buffer *buf, u64 flags)
5851 struct extent_record *rec;
5852 struct cache_extent *cache;
5853 struct btrfs_key key;
5854 enum btrfs_tree_block_status status;
5858 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5861 rec = container_of(cache, struct extent_record, cache);
5862 rec->generation = btrfs_header_generation(buf);
5864 level = btrfs_header_level(buf);
5865 if (btrfs_header_nritems(buf) > 0) {
5868 btrfs_item_key_to_cpu(buf, &key, 0);
5870 btrfs_node_key_to_cpu(buf, &key, 0);
5872 rec->info_objectid = key.objectid;
5874 rec->info_level = level;
5876 if (btrfs_is_leaf(buf))
5877 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5879 status = btrfs_check_node(root, &rec->parent_key, buf);
5881 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5883 status = try_to_fix_bad_block(root, buf, status);
5884 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5886 fprintf(stderr, "bad block %llu\n",
5887 (unsigned long long)buf->start);
5890 * Signal to callers we need to start the scan over
5891 * again since we'll have cowed blocks.
5896 rec->content_checked = 1;
5897 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5898 rec->owner_ref_checked = 1;
5900 ret = check_owner_ref(root, rec, buf);
5902 rec->owner_ref_checked = 1;
5906 maybe_free_extent_rec(extent_cache, rec);
5910 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5911 u64 parent, u64 root)
5913 struct list_head *cur = rec->backrefs.next;
5914 struct extent_backref *node;
5915 struct tree_backref *back;
5917 while(cur != &rec->backrefs) {
5918 node = to_extent_backref(cur);
5922 back = to_tree_backref(node);
5924 if (!node->full_backref)
5926 if (parent == back->parent)
5929 if (node->full_backref)
5931 if (back->root == root)
5938 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5939 u64 parent, u64 root)
5941 struct tree_backref *ref = malloc(sizeof(*ref));
5945 memset(&ref->node, 0, sizeof(ref->node));
5947 ref->parent = parent;
5948 ref->node.full_backref = 1;
5951 ref->node.full_backref = 0;
5953 list_add_tail(&ref->node.list, &rec->backrefs);
5958 static struct data_backref *find_data_backref(struct extent_record *rec,
5959 u64 parent, u64 root,
5960 u64 owner, u64 offset,
5962 u64 disk_bytenr, u64 bytes)
5964 struct list_head *cur = rec->backrefs.next;
5965 struct extent_backref *node;
5966 struct data_backref *back;
5968 while(cur != &rec->backrefs) {
5969 node = to_extent_backref(cur);
5973 back = to_data_backref(node);
5975 if (!node->full_backref)
5977 if (parent == back->parent)
5980 if (node->full_backref)
5982 if (back->root == root && back->owner == owner &&
5983 back->offset == offset) {
5984 if (found_ref && node->found_ref &&
5985 (back->bytes != bytes ||
5986 back->disk_bytenr != disk_bytenr))
5995 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5996 u64 parent, u64 root,
5997 u64 owner, u64 offset,
6000 struct data_backref *ref = malloc(sizeof(*ref));
6004 memset(&ref->node, 0, sizeof(ref->node));
6005 ref->node.is_data = 1;
6008 ref->parent = parent;
6011 ref->node.full_backref = 1;
6015 ref->offset = offset;
6016 ref->node.full_backref = 0;
6018 ref->bytes = max_size;
6021 list_add_tail(&ref->node.list, &rec->backrefs);
6022 if (max_size > rec->max_size)
6023 rec->max_size = max_size;
6027 /* Check if the type of extent matches with its chunk */
6028 static void check_extent_type(struct extent_record *rec)
6030 struct btrfs_block_group_cache *bg_cache;
6032 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6036 /* data extent, check chunk directly*/
6037 if (!rec->metadata) {
6038 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6039 rec->wrong_chunk_type = 1;
6043 /* metadata extent, check the obvious case first */
6044 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6045 BTRFS_BLOCK_GROUP_METADATA))) {
6046 rec->wrong_chunk_type = 1;
6051 * Check SYSTEM extent, as it's also marked as metadata, we can only
6052 * make sure it's a SYSTEM extent by its backref
6054 if (!list_empty(&rec->backrefs)) {
6055 struct extent_backref *node;
6056 struct tree_backref *tback;
6059 node = to_extent_backref(rec->backrefs.next);
6060 if (node->is_data) {
6061 /* tree block shouldn't have data backref */
6062 rec->wrong_chunk_type = 1;
6065 tback = container_of(node, struct tree_backref, node);
6067 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6068 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6070 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6071 if (!(bg_cache->flags & bg_type))
6072 rec->wrong_chunk_type = 1;
6077 * Allocate a new extent record, fill default values from @tmpl and insert int
6078 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6079 * the cache, otherwise it fails.
6081 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6082 struct extent_record *tmpl)
6084 struct extent_record *rec;
6087 BUG_ON(tmpl->max_size == 0);
6088 rec = malloc(sizeof(*rec));
6091 rec->start = tmpl->start;
6092 rec->max_size = tmpl->max_size;
6093 rec->nr = max(tmpl->nr, tmpl->max_size);
6094 rec->found_rec = tmpl->found_rec;
6095 rec->content_checked = tmpl->content_checked;
6096 rec->owner_ref_checked = tmpl->owner_ref_checked;
6097 rec->num_duplicates = 0;
6098 rec->metadata = tmpl->metadata;
6099 rec->flag_block_full_backref = FLAG_UNSET;
6100 rec->bad_full_backref = 0;
6101 rec->crossing_stripes = 0;
6102 rec->wrong_chunk_type = 0;
6103 rec->is_root = tmpl->is_root;
6104 rec->refs = tmpl->refs;
6105 rec->extent_item_refs = tmpl->extent_item_refs;
6106 rec->parent_generation = tmpl->parent_generation;
6107 INIT_LIST_HEAD(&rec->backrefs);
6108 INIT_LIST_HEAD(&rec->dups);
6109 INIT_LIST_HEAD(&rec->list);
6110 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6111 rec->cache.start = tmpl->start;
6112 rec->cache.size = tmpl->nr;
6113 ret = insert_cache_extent(extent_cache, &rec->cache);
6118 bytes_used += rec->nr;
6121 rec->crossing_stripes = check_crossing_stripes(global_info,
6122 rec->start, global_info->nodesize);
6123 check_extent_type(rec);
6128 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6130 * - refs - if found, increase refs
6131 * - is_root - if found, set
6132 * - content_checked - if found, set
6133 * - owner_ref_checked - if found, set
6135 * If not found, create a new one, initialize and insert.
6137 static int add_extent_rec(struct cache_tree *extent_cache,
6138 struct extent_record *tmpl)
6140 struct extent_record *rec;
6141 struct cache_extent *cache;
6145 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6147 rec = container_of(cache, struct extent_record, cache);
6151 rec->nr = max(tmpl->nr, tmpl->max_size);
6154 * We need to make sure to reset nr to whatever the extent
6155 * record says was the real size, this way we can compare it to
6158 if (tmpl->found_rec) {
6159 if (tmpl->start != rec->start || rec->found_rec) {
6160 struct extent_record *tmp;
6163 if (list_empty(&rec->list))
6164 list_add_tail(&rec->list,
6165 &duplicate_extents);
6168 * We have to do this song and dance in case we
6169 * find an extent record that falls inside of
6170 * our current extent record but does not have
6171 * the same objectid.
6173 tmp = malloc(sizeof(*tmp));
6176 tmp->start = tmpl->start;
6177 tmp->max_size = tmpl->max_size;
6180 tmp->metadata = tmpl->metadata;
6181 tmp->extent_item_refs = tmpl->extent_item_refs;
6182 INIT_LIST_HEAD(&tmp->list);
6183 list_add_tail(&tmp->list, &rec->dups);
6184 rec->num_duplicates++;
6191 if (tmpl->extent_item_refs && !dup) {
6192 if (rec->extent_item_refs) {
6193 fprintf(stderr, "block %llu rec "
6194 "extent_item_refs %llu, passed %llu\n",
6195 (unsigned long long)tmpl->start,
6196 (unsigned long long)
6197 rec->extent_item_refs,
6198 (unsigned long long)tmpl->extent_item_refs);
6200 rec->extent_item_refs = tmpl->extent_item_refs;
6204 if (tmpl->content_checked)
6205 rec->content_checked = 1;
6206 if (tmpl->owner_ref_checked)
6207 rec->owner_ref_checked = 1;
6208 memcpy(&rec->parent_key, &tmpl->parent_key,
6209 sizeof(tmpl->parent_key));
6210 if (tmpl->parent_generation)
6211 rec->parent_generation = tmpl->parent_generation;
6212 if (rec->max_size < tmpl->max_size)
6213 rec->max_size = tmpl->max_size;
6216 * A metadata extent can't cross stripe_len boundary, otherwise
6217 * kernel scrub won't be able to handle it.
6218 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6222 rec->crossing_stripes = check_crossing_stripes(
6223 global_info, rec->start,
6224 global_info->nodesize);
6225 check_extent_type(rec);
6226 maybe_free_extent_rec(extent_cache, rec);
6230 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6235 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6236 u64 parent, u64 root, int found_ref)
6238 struct extent_record *rec;
6239 struct tree_backref *back;
6240 struct cache_extent *cache;
6243 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6245 struct extent_record tmpl;
6247 memset(&tmpl, 0, sizeof(tmpl));
6248 tmpl.start = bytenr;
6253 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6257 /* really a bug in cache_extent implement now */
6258 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6263 rec = container_of(cache, struct extent_record, cache);
6264 if (rec->start != bytenr) {
6266 * Several cause, from unaligned bytenr to over lapping extents
6271 back = find_tree_backref(rec, parent, root);
6273 back = alloc_tree_backref(rec, parent, root);
6279 if (back->node.found_ref) {
6280 fprintf(stderr, "Extent back ref already exists "
6281 "for %llu parent %llu root %llu \n",
6282 (unsigned long long)bytenr,
6283 (unsigned long long)parent,
6284 (unsigned long long)root);
6286 back->node.found_ref = 1;
6288 if (back->node.found_extent_tree) {
6289 fprintf(stderr, "Extent back ref already exists "
6290 "for %llu parent %llu root %llu \n",
6291 (unsigned long long)bytenr,
6292 (unsigned long long)parent,
6293 (unsigned long long)root);
6295 back->node.found_extent_tree = 1;
6297 check_extent_type(rec);
6298 maybe_free_extent_rec(extent_cache, rec);
6302 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6303 u64 parent, u64 root, u64 owner, u64 offset,
6304 u32 num_refs, int found_ref, u64 max_size)
6306 struct extent_record *rec;
6307 struct data_backref *back;
6308 struct cache_extent *cache;
6311 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6313 struct extent_record tmpl;
6315 memset(&tmpl, 0, sizeof(tmpl));
6316 tmpl.start = bytenr;
6318 tmpl.max_size = max_size;
6320 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6324 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6329 rec = container_of(cache, struct extent_record, cache);
6330 if (rec->max_size < max_size)
6331 rec->max_size = max_size;
6334 * If found_ref is set then max_size is the real size and must match the
6335 * existing refs. So if we have already found a ref then we need to
6336 * make sure that this ref matches the existing one, otherwise we need
6337 * to add a new backref so we can notice that the backrefs don't match
6338 * and we need to figure out who is telling the truth. This is to
6339 * account for that awful fsync bug I introduced where we'd end up with
6340 * a btrfs_file_extent_item that would have its length include multiple
6341 * prealloc extents or point inside of a prealloc extent.
6343 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6346 back = alloc_data_backref(rec, parent, root, owner, offset,
6352 BUG_ON(num_refs != 1);
6353 if (back->node.found_ref)
6354 BUG_ON(back->bytes != max_size);
6355 back->node.found_ref = 1;
6356 back->found_ref += 1;
6357 back->bytes = max_size;
6358 back->disk_bytenr = bytenr;
6360 rec->content_checked = 1;
6361 rec->owner_ref_checked = 1;
6363 if (back->node.found_extent_tree) {
6364 fprintf(stderr, "Extent back ref already exists "
6365 "for %llu parent %llu root %llu "
6366 "owner %llu offset %llu num_refs %lu\n",
6367 (unsigned long long)bytenr,
6368 (unsigned long long)parent,
6369 (unsigned long long)root,
6370 (unsigned long long)owner,
6371 (unsigned long long)offset,
6372 (unsigned long)num_refs);
6374 back->num_refs = num_refs;
6375 back->node.found_extent_tree = 1;
6377 maybe_free_extent_rec(extent_cache, rec);
6381 static int add_pending(struct cache_tree *pending,
6382 struct cache_tree *seen, u64 bytenr, u32 size)
6385 ret = add_cache_extent(seen, bytenr, size);
6388 add_cache_extent(pending, bytenr, size);
6392 static int pick_next_pending(struct cache_tree *pending,
6393 struct cache_tree *reada,
6394 struct cache_tree *nodes,
6395 u64 last, struct block_info *bits, int bits_nr,
6398 unsigned long node_start = last;
6399 struct cache_extent *cache;
6402 cache = search_cache_extent(reada, 0);
6404 bits[0].start = cache->start;
6405 bits[0].size = cache->size;
6410 if (node_start > 32768)
6411 node_start -= 32768;
6413 cache = search_cache_extent(nodes, node_start);
6415 cache = search_cache_extent(nodes, 0);
6418 cache = search_cache_extent(pending, 0);
6423 bits[ret].start = cache->start;
6424 bits[ret].size = cache->size;
6425 cache = next_cache_extent(cache);
6427 } while (cache && ret < bits_nr);
6433 bits[ret].start = cache->start;
6434 bits[ret].size = cache->size;
6435 cache = next_cache_extent(cache);
6437 } while (cache && ret < bits_nr);
6439 if (bits_nr - ret > 8) {
6440 u64 lookup = bits[0].start + bits[0].size;
6441 struct cache_extent *next;
6442 next = search_cache_extent(pending, lookup);
6444 if (next->start - lookup > 32768)
6446 bits[ret].start = next->start;
6447 bits[ret].size = next->size;
6448 lookup = next->start + next->size;
6452 next = next_cache_extent(next);
6460 static void free_chunk_record(struct cache_extent *cache)
6462 struct chunk_record *rec;
6464 rec = container_of(cache, struct chunk_record, cache);
6465 list_del_init(&rec->list);
6466 list_del_init(&rec->dextents);
6470 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6472 cache_tree_free_extents(chunk_cache, free_chunk_record);
6475 static void free_device_record(struct rb_node *node)
6477 struct device_record *rec;
6479 rec = container_of(node, struct device_record, node);
6483 FREE_RB_BASED_TREE(device_cache, free_device_record);
6485 int insert_block_group_record(struct block_group_tree *tree,
6486 struct block_group_record *bg_rec)
6490 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6494 list_add_tail(&bg_rec->list, &tree->block_groups);
6498 static void free_block_group_record(struct cache_extent *cache)
6500 struct block_group_record *rec;
6502 rec = container_of(cache, struct block_group_record, cache);
6503 list_del_init(&rec->list);
6507 void free_block_group_tree(struct block_group_tree *tree)
6509 cache_tree_free_extents(&tree->tree, free_block_group_record);
6512 int insert_device_extent_record(struct device_extent_tree *tree,
6513 struct device_extent_record *de_rec)
6518 * Device extent is a bit different from the other extents, because
6519 * the extents which belong to the different devices may have the
6520 * same start and size, so we need use the special extent cache
6521 * search/insert functions.
6523 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6527 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6528 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6532 static void free_device_extent_record(struct cache_extent *cache)
6534 struct device_extent_record *rec;
6536 rec = container_of(cache, struct device_extent_record, cache);
6537 if (!list_empty(&rec->chunk_list))
6538 list_del_init(&rec->chunk_list);
6539 if (!list_empty(&rec->device_list))
6540 list_del_init(&rec->device_list);
6544 void free_device_extent_tree(struct device_extent_tree *tree)
6546 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6549 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6550 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6551 struct extent_buffer *leaf, int slot)
6553 struct btrfs_extent_ref_v0 *ref0;
6554 struct btrfs_key key;
6557 btrfs_item_key_to_cpu(leaf, &key, slot);
6558 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6559 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6560 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6563 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6564 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6570 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6571 struct btrfs_key *key,
6574 struct btrfs_chunk *ptr;
6575 struct chunk_record *rec;
6578 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6579 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6581 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6583 fprintf(stderr, "memory allocation failed\n");
6587 INIT_LIST_HEAD(&rec->list);
6588 INIT_LIST_HEAD(&rec->dextents);
6591 rec->cache.start = key->offset;
6592 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6594 rec->generation = btrfs_header_generation(leaf);
6596 rec->objectid = key->objectid;
6597 rec->type = key->type;
6598 rec->offset = key->offset;
6600 rec->length = rec->cache.size;
6601 rec->owner = btrfs_chunk_owner(leaf, ptr);
6602 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6603 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6604 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6605 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6606 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6607 rec->num_stripes = num_stripes;
6608 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6610 for (i = 0; i < rec->num_stripes; ++i) {
6611 rec->stripes[i].devid =
6612 btrfs_stripe_devid_nr(leaf, ptr, i);
6613 rec->stripes[i].offset =
6614 btrfs_stripe_offset_nr(leaf, ptr, i);
6615 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6616 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6623 static int process_chunk_item(struct cache_tree *chunk_cache,
6624 struct btrfs_key *key, struct extent_buffer *eb,
6627 struct chunk_record *rec;
6628 struct btrfs_chunk *chunk;
6631 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6633 * Do extra check for this chunk item,
6635 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6636 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6637 * and owner<->key_type check.
6639 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6642 error("chunk(%llu, %llu) is not valid, ignore it",
6643 key->offset, btrfs_chunk_length(eb, chunk));
6646 rec = btrfs_new_chunk_record(eb, key, slot);
6647 ret = insert_cache_extent(chunk_cache, &rec->cache);
6649 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6650 rec->offset, rec->length);
6657 static int process_device_item(struct rb_root *dev_cache,
6658 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6660 struct btrfs_dev_item *ptr;
6661 struct device_record *rec;
6664 ptr = btrfs_item_ptr(eb,
6665 slot, struct btrfs_dev_item);
6667 rec = malloc(sizeof(*rec));
6669 fprintf(stderr, "memory allocation failed\n");
6673 rec->devid = key->offset;
6674 rec->generation = btrfs_header_generation(eb);
6676 rec->objectid = key->objectid;
6677 rec->type = key->type;
6678 rec->offset = key->offset;
6680 rec->devid = btrfs_device_id(eb, ptr);
6681 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6682 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6684 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6686 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6693 struct block_group_record *
6694 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6697 struct btrfs_block_group_item *ptr;
6698 struct block_group_record *rec;
6700 rec = calloc(1, sizeof(*rec));
6702 fprintf(stderr, "memory allocation failed\n");
6706 rec->cache.start = key->objectid;
6707 rec->cache.size = key->offset;
6709 rec->generation = btrfs_header_generation(leaf);
6711 rec->objectid = key->objectid;
6712 rec->type = key->type;
6713 rec->offset = key->offset;
6715 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6716 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6718 INIT_LIST_HEAD(&rec->list);
6723 static int process_block_group_item(struct block_group_tree *block_group_cache,
6724 struct btrfs_key *key,
6725 struct extent_buffer *eb, int slot)
6727 struct block_group_record *rec;
6730 rec = btrfs_new_block_group_record(eb, key, slot);
6731 ret = insert_block_group_record(block_group_cache, rec);
6733 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6734 rec->objectid, rec->offset);
6741 struct device_extent_record *
6742 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6743 struct btrfs_key *key, int slot)
6745 struct device_extent_record *rec;
6746 struct btrfs_dev_extent *ptr;
6748 rec = calloc(1, sizeof(*rec));
6750 fprintf(stderr, "memory allocation failed\n");
6754 rec->cache.objectid = key->objectid;
6755 rec->cache.start = key->offset;
6757 rec->generation = btrfs_header_generation(leaf);
6759 rec->objectid = key->objectid;
6760 rec->type = key->type;
6761 rec->offset = key->offset;
6763 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6764 rec->chunk_objecteid =
6765 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6767 btrfs_dev_extent_chunk_offset(leaf, ptr);
6768 rec->length = btrfs_dev_extent_length(leaf, ptr);
6769 rec->cache.size = rec->length;
6771 INIT_LIST_HEAD(&rec->chunk_list);
6772 INIT_LIST_HEAD(&rec->device_list);
6778 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6779 struct btrfs_key *key, struct extent_buffer *eb,
6782 struct device_extent_record *rec;
6785 rec = btrfs_new_device_extent_record(eb, key, slot);
6786 ret = insert_device_extent_record(dev_extent_cache, rec);
6789 "Device extent[%llu, %llu, %llu] existed.\n",
6790 rec->objectid, rec->offset, rec->length);
6797 static int process_extent_item(struct btrfs_root *root,
6798 struct cache_tree *extent_cache,
6799 struct extent_buffer *eb, int slot)
6801 struct btrfs_extent_item *ei;
6802 struct btrfs_extent_inline_ref *iref;
6803 struct btrfs_extent_data_ref *dref;
6804 struct btrfs_shared_data_ref *sref;
6805 struct btrfs_key key;
6806 struct extent_record tmpl;
6811 u32 item_size = btrfs_item_size_nr(eb, slot);
6817 btrfs_item_key_to_cpu(eb, &key, slot);
6819 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6821 num_bytes = root->fs_info->nodesize;
6823 num_bytes = key.offset;
6826 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6827 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6828 key.objectid, root->fs_info->sectorsize);
6831 if (item_size < sizeof(*ei)) {
6832 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6833 struct btrfs_extent_item_v0 *ei0;
6834 BUG_ON(item_size != sizeof(*ei0));
6835 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6836 refs = btrfs_extent_refs_v0(eb, ei0);
6840 memset(&tmpl, 0, sizeof(tmpl));
6841 tmpl.start = key.objectid;
6842 tmpl.nr = num_bytes;
6843 tmpl.extent_item_refs = refs;
6844 tmpl.metadata = metadata;
6846 tmpl.max_size = num_bytes;
6848 return add_extent_rec(extent_cache, &tmpl);
6851 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6852 refs = btrfs_extent_refs(eb, ei);
6853 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6857 if (metadata && num_bytes != root->fs_info->nodesize) {
6858 error("ignore invalid metadata extent, length %llu does not equal to %u",
6859 num_bytes, root->fs_info->nodesize);
6862 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6863 error("ignore invalid data extent, length %llu is not aligned to %u",
6864 num_bytes, root->fs_info->sectorsize);
6868 memset(&tmpl, 0, sizeof(tmpl));
6869 tmpl.start = key.objectid;
6870 tmpl.nr = num_bytes;
6871 tmpl.extent_item_refs = refs;
6872 tmpl.metadata = metadata;
6874 tmpl.max_size = num_bytes;
6875 add_extent_rec(extent_cache, &tmpl);
6877 ptr = (unsigned long)(ei + 1);
6878 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6879 key.type == BTRFS_EXTENT_ITEM_KEY)
6880 ptr += sizeof(struct btrfs_tree_block_info);
6882 end = (unsigned long)ei + item_size;
6884 iref = (struct btrfs_extent_inline_ref *)ptr;
6885 type = btrfs_extent_inline_ref_type(eb, iref);
6886 offset = btrfs_extent_inline_ref_offset(eb, iref);
6888 case BTRFS_TREE_BLOCK_REF_KEY:
6889 ret = add_tree_backref(extent_cache, key.objectid,
6893 "add_tree_backref failed (extent items tree block): %s",
6896 case BTRFS_SHARED_BLOCK_REF_KEY:
6897 ret = add_tree_backref(extent_cache, key.objectid,
6901 "add_tree_backref failed (extent items shared block): %s",
6904 case BTRFS_EXTENT_DATA_REF_KEY:
6905 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6906 add_data_backref(extent_cache, key.objectid, 0,
6907 btrfs_extent_data_ref_root(eb, dref),
6908 btrfs_extent_data_ref_objectid(eb,
6910 btrfs_extent_data_ref_offset(eb, dref),
6911 btrfs_extent_data_ref_count(eb, dref),
6914 case BTRFS_SHARED_DATA_REF_KEY:
6915 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6916 add_data_backref(extent_cache, key.objectid, offset,
6918 btrfs_shared_data_ref_count(eb, sref),
6922 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6923 key.objectid, key.type, num_bytes);
6926 ptr += btrfs_extent_inline_ref_size(type);
6933 static int check_cache_range(struct btrfs_root *root,
6934 struct btrfs_block_group_cache *cache,
6935 u64 offset, u64 bytes)
6937 struct btrfs_free_space *entry;
6943 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6944 bytenr = btrfs_sb_offset(i);
6945 ret = btrfs_rmap_block(root->fs_info,
6946 cache->key.objectid, bytenr, 0,
6947 &logical, &nr, &stripe_len);
6952 if (logical[nr] + stripe_len <= offset)
6954 if (offset + bytes <= logical[nr])
6956 if (logical[nr] == offset) {
6957 if (stripe_len >= bytes) {
6961 bytes -= stripe_len;
6962 offset += stripe_len;
6963 } else if (logical[nr] < offset) {
6964 if (logical[nr] + stripe_len >=
6969 bytes = (offset + bytes) -
6970 (logical[nr] + stripe_len);
6971 offset = logical[nr] + stripe_len;
6974 * Could be tricky, the super may land in the
6975 * middle of the area we're checking. First
6976 * check the easiest case, it's at the end.
6978 if (logical[nr] + stripe_len >=
6980 bytes = logical[nr] - offset;
6984 /* Check the left side */
6985 ret = check_cache_range(root, cache,
6987 logical[nr] - offset);
6993 /* Now we continue with the right side */
6994 bytes = (offset + bytes) -
6995 (logical[nr] + stripe_len);
6996 offset = logical[nr] + stripe_len;
7003 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
7005 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
7006 offset, offset+bytes);
7010 if (entry->offset != offset) {
7011 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7016 if (entry->bytes != bytes) {
7017 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7018 bytes, entry->bytes, offset);
7022 unlink_free_space(cache->free_space_ctl, entry);
7027 static int verify_space_cache(struct btrfs_root *root,
7028 struct btrfs_block_group_cache *cache)
7030 struct btrfs_path path;
7031 struct extent_buffer *leaf;
7032 struct btrfs_key key;
7036 root = root->fs_info->extent_root;
7038 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7040 btrfs_init_path(&path);
7041 key.objectid = last;
7043 key.type = BTRFS_EXTENT_ITEM_KEY;
7044 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7049 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7050 ret = btrfs_next_leaf(root, &path);
7058 leaf = path.nodes[0];
7059 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7060 if (key.objectid >= cache->key.offset + cache->key.objectid)
7062 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7063 key.type != BTRFS_METADATA_ITEM_KEY) {
7068 if (last == key.objectid) {
7069 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7070 last = key.objectid + key.offset;
7072 last = key.objectid + root->fs_info->nodesize;
7077 ret = check_cache_range(root, cache, last,
7078 key.objectid - last);
7081 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7082 last = key.objectid + key.offset;
7084 last = key.objectid + root->fs_info->nodesize;
7088 if (last < cache->key.objectid + cache->key.offset)
7089 ret = check_cache_range(root, cache, last,
7090 cache->key.objectid +
7091 cache->key.offset - last);
7094 btrfs_release_path(&path);
7097 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7098 fprintf(stderr, "There are still entries left in the space "
7106 static int check_space_cache(struct btrfs_root *root)
7108 struct btrfs_block_group_cache *cache;
7109 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7113 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7114 btrfs_super_generation(root->fs_info->super_copy) !=
7115 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7116 printf("cache and super generation don't match, space cache "
7117 "will be invalidated\n");
7121 if (ctx.progress_enabled) {
7122 ctx.tp = TASK_FREE_SPACE;
7123 task_start(ctx.info);
7127 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7131 start = cache->key.objectid + cache->key.offset;
7132 if (!cache->free_space_ctl) {
7133 if (btrfs_init_free_space_ctl(cache,
7134 root->fs_info->sectorsize)) {
7139 btrfs_remove_free_space_cache(cache);
7142 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7143 ret = exclude_super_stripes(root, cache);
7145 fprintf(stderr, "could not exclude super stripes: %s\n",
7150 ret = load_free_space_tree(root->fs_info, cache);
7151 free_excluded_extents(root, cache);
7153 fprintf(stderr, "could not load free space tree: %s\n",
7160 ret = load_free_space_cache(root->fs_info, cache);
7165 ret = verify_space_cache(root, cache);
7167 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7168 cache->key.objectid);
7173 task_stop(ctx.info);
7175 return error ? -EINVAL : 0;
7178 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7179 u64 num_bytes, unsigned long leaf_offset,
7180 struct extent_buffer *eb) {
7182 struct btrfs_fs_info *fs_info = root->fs_info;
7184 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7186 unsigned long csum_offset;
7190 u64 data_checked = 0;
7196 if (num_bytes % fs_info->sectorsize)
7199 data = malloc(num_bytes);
7203 while (offset < num_bytes) {
7206 read_len = num_bytes - offset;
7207 /* read as much space once a time */
7208 ret = read_extent_data(fs_info, data + offset,
7209 bytenr + offset, &read_len, mirror);
7213 /* verify every 4k data's checksum */
7214 while (data_checked < read_len) {
7216 tmp = offset + data_checked;
7218 csum = btrfs_csum_data((char *)data + tmp,
7219 csum, fs_info->sectorsize);
7220 btrfs_csum_final(csum, (u8 *)&csum);
7222 csum_offset = leaf_offset +
7223 tmp / fs_info->sectorsize * csum_size;
7224 read_extent_buffer(eb, (char *)&csum_expected,
7225 csum_offset, csum_size);
7226 /* try another mirror */
7227 if (csum != csum_expected) {
7228 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7229 mirror, bytenr + tmp,
7230 csum, csum_expected);
7231 num_copies = btrfs_num_copies(root->fs_info,
7233 if (mirror < num_copies - 1) {
7238 data_checked += fs_info->sectorsize;
7247 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7250 struct btrfs_path path;
7251 struct extent_buffer *leaf;
7252 struct btrfs_key key;
7255 btrfs_init_path(&path);
7256 key.objectid = bytenr;
7257 key.type = BTRFS_EXTENT_ITEM_KEY;
7258 key.offset = (u64)-1;
7261 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7264 fprintf(stderr, "Error looking up extent record %d\n", ret);
7265 btrfs_release_path(&path);
7268 if (path.slots[0] > 0) {
7271 ret = btrfs_prev_leaf(root, &path);
7274 } else if (ret > 0) {
7281 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7284 * Block group items come before extent items if they have the same
7285 * bytenr, so walk back one more just in case. Dear future traveller,
7286 * first congrats on mastering time travel. Now if it's not too much
7287 * trouble could you go back to 2006 and tell Chris to make the
7288 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7289 * EXTENT_ITEM_KEY please?
7291 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7292 if (path.slots[0] > 0) {
7295 ret = btrfs_prev_leaf(root, &path);
7298 } else if (ret > 0) {
7303 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7307 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7308 ret = btrfs_next_leaf(root, &path);
7310 fprintf(stderr, "Error going to next leaf "
7312 btrfs_release_path(&path);
7318 leaf = path.nodes[0];
7319 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7320 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7324 if (key.objectid + key.offset < bytenr) {
7328 if (key.objectid > bytenr + num_bytes)
7331 if (key.objectid == bytenr) {
7332 if (key.offset >= num_bytes) {
7336 num_bytes -= key.offset;
7337 bytenr += key.offset;
7338 } else if (key.objectid < bytenr) {
7339 if (key.objectid + key.offset >= bytenr + num_bytes) {
7343 num_bytes = (bytenr + num_bytes) -
7344 (key.objectid + key.offset);
7345 bytenr = key.objectid + key.offset;
7347 if (key.objectid + key.offset < bytenr + num_bytes) {
7348 u64 new_start = key.objectid + key.offset;
7349 u64 new_bytes = bytenr + num_bytes - new_start;
7352 * Weird case, the extent is in the middle of
7353 * our range, we'll have to search one side
7354 * and then the other. Not sure if this happens
7355 * in real life, but no harm in coding it up
7356 * anyway just in case.
7358 btrfs_release_path(&path);
7359 ret = check_extent_exists(root, new_start,
7362 fprintf(stderr, "Right section didn't "
7366 num_bytes = key.objectid - bytenr;
7369 num_bytes = key.objectid - bytenr;
7376 if (num_bytes && !ret) {
7377 fprintf(stderr, "There are no extents for csum range "
7378 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7382 btrfs_release_path(&path);
7386 static int check_csums(struct btrfs_root *root)
7388 struct btrfs_path path;
7389 struct extent_buffer *leaf;
7390 struct btrfs_key key;
7391 u64 offset = 0, num_bytes = 0;
7392 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7396 unsigned long leaf_offset;
7398 root = root->fs_info->csum_root;
7399 if (!extent_buffer_uptodate(root->node)) {
7400 fprintf(stderr, "No valid csum tree found\n");
7404 btrfs_init_path(&path);
7405 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7406 key.type = BTRFS_EXTENT_CSUM_KEY;
7408 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7410 fprintf(stderr, "Error searching csum tree %d\n", ret);
7411 btrfs_release_path(&path);
7415 if (ret > 0 && path.slots[0])
7420 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7421 ret = btrfs_next_leaf(root, &path);
7423 fprintf(stderr, "Error going to next leaf "
7430 leaf = path.nodes[0];
7432 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7433 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7438 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7439 csum_size) * root->fs_info->sectorsize;
7440 if (!check_data_csum)
7441 goto skip_csum_check;
7442 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7443 ret = check_extent_csums(root, key.offset, data_len,
7449 offset = key.offset;
7450 } else if (key.offset != offset + num_bytes) {
7451 ret = check_extent_exists(root, offset, num_bytes);
7453 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7454 "there is no extent record\n",
7455 offset, offset+num_bytes);
7458 offset = key.offset;
7461 num_bytes += data_len;
7465 btrfs_release_path(&path);
7469 static int is_dropped_key(struct btrfs_key *key,
7470 struct btrfs_key *drop_key) {
7471 if (key->objectid < drop_key->objectid)
7473 else if (key->objectid == drop_key->objectid) {
7474 if (key->type < drop_key->type)
7476 else if (key->type == drop_key->type) {
7477 if (key->offset < drop_key->offset)
7485 * Here are the rules for FULL_BACKREF.
7487 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7488 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7490 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7491 * if it happened after the relocation occurred since we'll have dropped the
7492 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7493 * have no real way to know for sure.
7495 * We process the blocks one root at a time, and we start from the lowest root
7496 * objectid and go to the highest. So we can just lookup the owner backref for
7497 * the record and if we don't find it then we know it doesn't exist and we have
7500 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7501 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7502 * be set or not and then we can check later once we've gathered all the refs.
7504 static int calc_extent_flag(struct cache_tree *extent_cache,
7505 struct extent_buffer *buf,
7506 struct root_item_record *ri,
7509 struct extent_record *rec;
7510 struct cache_extent *cache;
7511 struct tree_backref *tback;
7514 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7515 /* we have added this extent before */
7519 rec = container_of(cache, struct extent_record, cache);
7522 * Except file/reloc tree, we can not have
7525 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7530 if (buf->start == ri->bytenr)
7533 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7536 owner = btrfs_header_owner(buf);
7537 if (owner == ri->objectid)
7540 tback = find_tree_backref(rec, 0, owner);
7545 if (rec->flag_block_full_backref != FLAG_UNSET &&
7546 rec->flag_block_full_backref != 0)
7547 rec->bad_full_backref = 1;
7550 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7551 if (rec->flag_block_full_backref != FLAG_UNSET &&
7552 rec->flag_block_full_backref != 1)
7553 rec->bad_full_backref = 1;
7557 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7559 fprintf(stderr, "Invalid key type(");
7560 print_key_type(stderr, 0, key_type);
7561 fprintf(stderr, ") found in root(");
7562 print_objectid(stderr, rootid, 0);
7563 fprintf(stderr, ")\n");
7567 * Check if the key is valid with its extent buffer.
7569 * This is a early check in case invalid key exists in a extent buffer
7570 * This is not comprehensive yet, but should prevent wrong key/item passed
7573 static int check_type_with_root(u64 rootid, u8 key_type)
7576 /* Only valid in chunk tree */
7577 case BTRFS_DEV_ITEM_KEY:
7578 case BTRFS_CHUNK_ITEM_KEY:
7579 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7582 /* valid in csum and log tree */
7583 case BTRFS_CSUM_TREE_OBJECTID:
7584 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7588 case BTRFS_EXTENT_ITEM_KEY:
7589 case BTRFS_METADATA_ITEM_KEY:
7590 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7591 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7594 case BTRFS_ROOT_ITEM_KEY:
7595 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7598 case BTRFS_DEV_EXTENT_KEY:
7599 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7605 report_mismatch_key_root(key_type, rootid);
7609 static int run_next_block(struct btrfs_root *root,
7610 struct block_info *bits,
7613 struct cache_tree *pending,
7614 struct cache_tree *seen,
7615 struct cache_tree *reada,
7616 struct cache_tree *nodes,
7617 struct cache_tree *extent_cache,
7618 struct cache_tree *chunk_cache,
7619 struct rb_root *dev_cache,
7620 struct block_group_tree *block_group_cache,
7621 struct device_extent_tree *dev_extent_cache,
7622 struct root_item_record *ri)
7624 struct btrfs_fs_info *fs_info = root->fs_info;
7625 struct extent_buffer *buf;
7626 struct extent_record *rec = NULL;
7637 struct btrfs_key key;
7638 struct cache_extent *cache;
7641 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7642 bits_nr, &reada_bits);
7647 for(i = 0; i < nritems; i++) {
7648 ret = add_cache_extent(reada, bits[i].start,
7653 /* fixme, get the parent transid */
7654 readahead_tree_block(fs_info, bits[i].start,
7658 *last = bits[0].start;
7659 bytenr = bits[0].start;
7660 size = bits[0].size;
7662 cache = lookup_cache_extent(pending, bytenr, size);
7664 remove_cache_extent(pending, cache);
7667 cache = lookup_cache_extent(reada, bytenr, size);
7669 remove_cache_extent(reada, cache);
7672 cache = lookup_cache_extent(nodes, bytenr, size);
7674 remove_cache_extent(nodes, cache);
7677 cache = lookup_cache_extent(extent_cache, bytenr, size);
7679 rec = container_of(cache, struct extent_record, cache);
7680 gen = rec->parent_generation;
7683 /* fixme, get the real parent transid */
7684 buf = read_tree_block(root->fs_info, bytenr, gen);
7685 if (!extent_buffer_uptodate(buf)) {
7686 record_bad_block_io(root->fs_info,
7687 extent_cache, bytenr, size);
7691 nritems = btrfs_header_nritems(buf);
7694 if (!init_extent_tree) {
7695 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7696 btrfs_header_level(buf), 1, NULL,
7699 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7701 fprintf(stderr, "Couldn't calc extent flags\n");
7702 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7707 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7709 fprintf(stderr, "Couldn't calc extent flags\n");
7710 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7716 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7717 ri->objectid == btrfs_header_owner(buf)) {
7719 * Ok we got to this block from it's original owner and
7720 * we have FULL_BACKREF set. Relocation can leave
7721 * converted blocks over so this is altogether possible,
7722 * however it's not possible if the generation > the
7723 * last snapshot, so check for this case.
7725 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7726 btrfs_header_generation(buf) > ri->last_snapshot) {
7727 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7728 rec->bad_full_backref = 1;
7733 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7734 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7735 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7736 rec->bad_full_backref = 1;
7740 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7741 rec->flag_block_full_backref = 1;
7745 rec->flag_block_full_backref = 0;
7747 owner = btrfs_header_owner(buf);
7750 ret = check_block(root, extent_cache, buf, flags);
7754 if (btrfs_is_leaf(buf)) {
7755 btree_space_waste += btrfs_leaf_free_space(root, buf);
7756 for (i = 0; i < nritems; i++) {
7757 struct btrfs_file_extent_item *fi;
7758 btrfs_item_key_to_cpu(buf, &key, i);
7760 * Check key type against the leaf owner.
7761 * Could filter quite a lot of early error if
7764 if (check_type_with_root(btrfs_header_owner(buf),
7766 fprintf(stderr, "ignoring invalid key\n");
7769 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7770 process_extent_item(root, extent_cache, buf,
7774 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7775 process_extent_item(root, extent_cache, buf,
7779 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7781 btrfs_item_size_nr(buf, i);
7784 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7785 process_chunk_item(chunk_cache, &key, buf, i);
7788 if (key.type == BTRFS_DEV_ITEM_KEY) {
7789 process_device_item(dev_cache, &key, buf, i);
7792 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7793 process_block_group_item(block_group_cache,
7797 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7798 process_device_extent_item(dev_extent_cache,
7803 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7804 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7805 process_extent_ref_v0(extent_cache, buf, i);
7812 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7813 ret = add_tree_backref(extent_cache,
7814 key.objectid, 0, key.offset, 0);
7817 "add_tree_backref failed (leaf tree block): %s",
7821 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7822 ret = add_tree_backref(extent_cache,
7823 key.objectid, key.offset, 0, 0);
7826 "add_tree_backref failed (leaf shared block): %s",
7830 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7831 struct btrfs_extent_data_ref *ref;
7832 ref = btrfs_item_ptr(buf, i,
7833 struct btrfs_extent_data_ref);
7834 add_data_backref(extent_cache,
7836 btrfs_extent_data_ref_root(buf, ref),
7837 btrfs_extent_data_ref_objectid(buf,
7839 btrfs_extent_data_ref_offset(buf, ref),
7840 btrfs_extent_data_ref_count(buf, ref),
7841 0, root->fs_info->sectorsize);
7844 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7845 struct btrfs_shared_data_ref *ref;
7846 ref = btrfs_item_ptr(buf, i,
7847 struct btrfs_shared_data_ref);
7848 add_data_backref(extent_cache,
7849 key.objectid, key.offset, 0, 0, 0,
7850 btrfs_shared_data_ref_count(buf, ref),
7851 0, root->fs_info->sectorsize);
7854 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7855 struct bad_item *bad;
7857 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7861 bad = malloc(sizeof(struct bad_item));
7864 INIT_LIST_HEAD(&bad->list);
7865 memcpy(&bad->key, &key,
7866 sizeof(struct btrfs_key));
7867 bad->root_id = owner;
7868 list_add_tail(&bad->list, &delete_items);
7871 if (key.type != BTRFS_EXTENT_DATA_KEY)
7873 fi = btrfs_item_ptr(buf, i,
7874 struct btrfs_file_extent_item);
7875 if (btrfs_file_extent_type(buf, fi) ==
7876 BTRFS_FILE_EXTENT_INLINE)
7878 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7881 data_bytes_allocated +=
7882 btrfs_file_extent_disk_num_bytes(buf, fi);
7883 if (data_bytes_allocated < root->fs_info->sectorsize) {
7886 data_bytes_referenced +=
7887 btrfs_file_extent_num_bytes(buf, fi);
7888 add_data_backref(extent_cache,
7889 btrfs_file_extent_disk_bytenr(buf, fi),
7890 parent, owner, key.objectid, key.offset -
7891 btrfs_file_extent_offset(buf, fi), 1, 1,
7892 btrfs_file_extent_disk_num_bytes(buf, fi));
7896 struct btrfs_key first_key;
7898 first_key.objectid = 0;
7901 btrfs_item_key_to_cpu(buf, &first_key, 0);
7902 level = btrfs_header_level(buf);
7903 for (i = 0; i < nritems; i++) {
7904 struct extent_record tmpl;
7906 ptr = btrfs_node_blockptr(buf, i);
7907 size = root->fs_info->nodesize;
7908 btrfs_node_key_to_cpu(buf, &key, i);
7910 if ((level == ri->drop_level)
7911 && is_dropped_key(&key, &ri->drop_key)) {
7916 memset(&tmpl, 0, sizeof(tmpl));
7917 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7918 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7923 tmpl.max_size = size;
7924 ret = add_extent_rec(extent_cache, &tmpl);
7928 ret = add_tree_backref(extent_cache, ptr, parent,
7932 "add_tree_backref failed (non-leaf block): %s",
7938 add_pending(nodes, seen, ptr, size);
7940 add_pending(pending, seen, ptr, size);
7943 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7944 nritems) * sizeof(struct btrfs_key_ptr);
7946 total_btree_bytes += buf->len;
7947 if (fs_root_objectid(btrfs_header_owner(buf)))
7948 total_fs_tree_bytes += buf->len;
7949 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7950 total_extent_tree_bytes += buf->len;
7951 if (!found_old_backref &&
7952 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7953 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7954 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7955 found_old_backref = 1;
7957 free_extent_buffer(buf);
7961 static int add_root_to_pending(struct extent_buffer *buf,
7962 struct cache_tree *extent_cache,
7963 struct cache_tree *pending,
7964 struct cache_tree *seen,
7965 struct cache_tree *nodes,
7968 struct extent_record tmpl;
7971 if (btrfs_header_level(buf) > 0)
7972 add_pending(nodes, seen, buf->start, buf->len);
7974 add_pending(pending, seen, buf->start, buf->len);
7976 memset(&tmpl, 0, sizeof(tmpl));
7977 tmpl.start = buf->start;
7982 tmpl.max_size = buf->len;
7983 add_extent_rec(extent_cache, &tmpl);
7985 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7986 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7987 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7990 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7995 /* as we fix the tree, we might be deleting blocks that
7996 * we're tracking for repair. This hook makes sure we
7997 * remove any backrefs for blocks as we are fixing them.
7999 static int free_extent_hook(struct btrfs_trans_handle *trans,
8000 struct btrfs_root *root,
8001 u64 bytenr, u64 num_bytes, u64 parent,
8002 u64 root_objectid, u64 owner, u64 offset,
8005 struct extent_record *rec;
8006 struct cache_extent *cache;
8008 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8010 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8011 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8015 rec = container_of(cache, struct extent_record, cache);
8017 struct data_backref *back;
8018 back = find_data_backref(rec, parent, root_objectid, owner,
8019 offset, 1, bytenr, num_bytes);
8022 if (back->node.found_ref) {
8023 back->found_ref -= refs_to_drop;
8025 rec->refs -= refs_to_drop;
8027 if (back->node.found_extent_tree) {
8028 back->num_refs -= refs_to_drop;
8029 if (rec->extent_item_refs)
8030 rec->extent_item_refs -= refs_to_drop;
8032 if (back->found_ref == 0)
8033 back->node.found_ref = 0;
8034 if (back->num_refs == 0)
8035 back->node.found_extent_tree = 0;
8037 if (!back->node.found_extent_tree && back->node.found_ref) {
8038 list_del(&back->node.list);
8042 struct tree_backref *back;
8043 back = find_tree_backref(rec, parent, root_objectid);
8046 if (back->node.found_ref) {
8049 back->node.found_ref = 0;
8051 if (back->node.found_extent_tree) {
8052 if (rec->extent_item_refs)
8053 rec->extent_item_refs--;
8054 back->node.found_extent_tree = 0;
8056 if (!back->node.found_extent_tree && back->node.found_ref) {
8057 list_del(&back->node.list);
8061 maybe_free_extent_rec(extent_cache, rec);
8066 static int delete_extent_records(struct btrfs_trans_handle *trans,
8067 struct btrfs_root *root,
8068 struct btrfs_path *path,
8071 struct btrfs_key key;
8072 struct btrfs_key found_key;
8073 struct extent_buffer *leaf;
8078 key.objectid = bytenr;
8080 key.offset = (u64)-1;
8083 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8090 if (path->slots[0] == 0)
8096 leaf = path->nodes[0];
8097 slot = path->slots[0];
8099 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8100 if (found_key.objectid != bytenr)
8103 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8104 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8105 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8106 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8107 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8108 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8109 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8110 btrfs_release_path(path);
8111 if (found_key.type == 0) {
8112 if (found_key.offset == 0)
8114 key.offset = found_key.offset - 1;
8115 key.type = found_key.type;
8117 key.type = found_key.type - 1;
8118 key.offset = (u64)-1;
8122 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8123 found_key.objectid, found_key.type, found_key.offset);
8125 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8128 btrfs_release_path(path);
8130 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8131 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8132 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8133 found_key.offset : root->fs_info->nodesize;
8135 ret = btrfs_update_block_group(trans, root, bytenr,
8142 btrfs_release_path(path);
8147 * for a single backref, this will allocate a new extent
8148 * and add the backref to it.
8150 static int record_extent(struct btrfs_trans_handle *trans,
8151 struct btrfs_fs_info *info,
8152 struct btrfs_path *path,
8153 struct extent_record *rec,
8154 struct extent_backref *back,
8155 int allocated, u64 flags)
8158 struct btrfs_root *extent_root = info->extent_root;
8159 struct extent_buffer *leaf;
8160 struct btrfs_key ins_key;
8161 struct btrfs_extent_item *ei;
8162 struct data_backref *dback;
8163 struct btrfs_tree_block_info *bi;
8166 rec->max_size = max_t(u64, rec->max_size,
8170 u32 item_size = sizeof(*ei);
8173 item_size += sizeof(*bi);
8175 ins_key.objectid = rec->start;
8176 ins_key.offset = rec->max_size;
8177 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8179 ret = btrfs_insert_empty_item(trans, extent_root, path,
8180 &ins_key, item_size);
8184 leaf = path->nodes[0];
8185 ei = btrfs_item_ptr(leaf, path->slots[0],
8186 struct btrfs_extent_item);
8188 btrfs_set_extent_refs(leaf, ei, 0);
8189 btrfs_set_extent_generation(leaf, ei, rec->generation);
8191 if (back->is_data) {
8192 btrfs_set_extent_flags(leaf, ei,
8193 BTRFS_EXTENT_FLAG_DATA);
8195 struct btrfs_disk_key copy_key;;
8197 bi = (struct btrfs_tree_block_info *)(ei + 1);
8198 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8201 btrfs_set_disk_key_objectid(©_key,
8202 rec->info_objectid);
8203 btrfs_set_disk_key_type(©_key, 0);
8204 btrfs_set_disk_key_offset(©_key, 0);
8206 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8207 btrfs_set_tree_block_key(leaf, bi, ©_key);
8209 btrfs_set_extent_flags(leaf, ei,
8210 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8213 btrfs_mark_buffer_dirty(leaf);
8214 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8215 rec->max_size, 1, 0);
8218 btrfs_release_path(path);
8221 if (back->is_data) {
8225 dback = to_data_backref(back);
8226 if (back->full_backref)
8227 parent = dback->parent;
8231 for (i = 0; i < dback->found_ref; i++) {
8232 /* if parent != 0, we're doing a full backref
8233 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8234 * just makes the backref allocator create a data
8237 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8238 rec->start, rec->max_size,
8242 BTRFS_FIRST_FREE_OBJECTID :
8248 fprintf(stderr, "adding new data backref"
8249 " on %llu %s %llu owner %llu"
8250 " offset %llu found %d\n",
8251 (unsigned long long)rec->start,
8252 back->full_backref ?
8254 back->full_backref ?
8255 (unsigned long long)parent :
8256 (unsigned long long)dback->root,
8257 (unsigned long long)dback->owner,
8258 (unsigned long long)dback->offset,
8262 struct tree_backref *tback;
8264 tback = to_tree_backref(back);
8265 if (back->full_backref)
8266 parent = tback->parent;
8270 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8271 rec->start, rec->max_size,
8272 parent, tback->root, 0, 0);
8273 fprintf(stderr, "adding new tree backref on "
8274 "start %llu len %llu parent %llu root %llu\n",
8275 rec->start, rec->max_size, parent, tback->root);
8278 btrfs_release_path(path);
8282 static struct extent_entry *find_entry(struct list_head *entries,
8283 u64 bytenr, u64 bytes)
8285 struct extent_entry *entry = NULL;
8287 list_for_each_entry(entry, entries, list) {
8288 if (entry->bytenr == bytenr && entry->bytes == bytes)
8295 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8297 struct extent_entry *entry, *best = NULL, *prev = NULL;
8299 list_for_each_entry(entry, entries, list) {
8301 * If there are as many broken entries as entries then we know
8302 * not to trust this particular entry.
8304 if (entry->broken == entry->count)
8308 * Special case, when there are only two entries and 'best' is
8318 * If our current entry == best then we can't be sure our best
8319 * is really the best, so we need to keep searching.
8321 if (best && best->count == entry->count) {
8327 /* Prev == entry, not good enough, have to keep searching */
8328 if (!prev->broken && prev->count == entry->count)
8332 best = (prev->count > entry->count) ? prev : entry;
8333 else if (best->count < entry->count)
8341 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8342 struct data_backref *dback, struct extent_entry *entry)
8344 struct btrfs_trans_handle *trans;
8345 struct btrfs_root *root;
8346 struct btrfs_file_extent_item *fi;
8347 struct extent_buffer *leaf;
8348 struct btrfs_key key;
8352 key.objectid = dback->root;
8353 key.type = BTRFS_ROOT_ITEM_KEY;
8354 key.offset = (u64)-1;
8355 root = btrfs_read_fs_root(info, &key);
8357 fprintf(stderr, "Couldn't find root for our ref\n");
8362 * The backref points to the original offset of the extent if it was
8363 * split, so we need to search down to the offset we have and then walk
8364 * forward until we find the backref we're looking for.
8366 key.objectid = dback->owner;
8367 key.type = BTRFS_EXTENT_DATA_KEY;
8368 key.offset = dback->offset;
8369 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8371 fprintf(stderr, "Error looking up ref %d\n", ret);
8376 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8377 ret = btrfs_next_leaf(root, path);
8379 fprintf(stderr, "Couldn't find our ref, next\n");
8383 leaf = path->nodes[0];
8384 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8385 if (key.objectid != dback->owner ||
8386 key.type != BTRFS_EXTENT_DATA_KEY) {
8387 fprintf(stderr, "Couldn't find our ref, search\n");
8390 fi = btrfs_item_ptr(leaf, path->slots[0],
8391 struct btrfs_file_extent_item);
8392 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8393 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8395 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8400 btrfs_release_path(path);
8402 trans = btrfs_start_transaction(root, 1);
8404 return PTR_ERR(trans);
8407 * Ok we have the key of the file extent we want to fix, now we can cow
8408 * down to the thing and fix it.
8410 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8412 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8413 key.objectid, key.type, key.offset, ret);
8417 fprintf(stderr, "Well that's odd, we just found this key "
8418 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8423 leaf = path->nodes[0];
8424 fi = btrfs_item_ptr(leaf, path->slots[0],
8425 struct btrfs_file_extent_item);
8427 if (btrfs_file_extent_compression(leaf, fi) &&
8428 dback->disk_bytenr != entry->bytenr) {
8429 fprintf(stderr, "Ref doesn't match the record start and is "
8430 "compressed, please take a btrfs-image of this file "
8431 "system and send it to a btrfs developer so they can "
8432 "complete this functionality for bytenr %Lu\n",
8433 dback->disk_bytenr);
8438 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8439 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8440 } else if (dback->disk_bytenr > entry->bytenr) {
8441 u64 off_diff, offset;
8443 off_diff = dback->disk_bytenr - entry->bytenr;
8444 offset = btrfs_file_extent_offset(leaf, fi);
8445 if (dback->disk_bytenr + offset +
8446 btrfs_file_extent_num_bytes(leaf, fi) >
8447 entry->bytenr + entry->bytes) {
8448 fprintf(stderr, "Ref is past the entry end, please "
8449 "take a btrfs-image of this file system and "
8450 "send it to a btrfs developer, ref %Lu\n",
8451 dback->disk_bytenr);
8456 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8457 btrfs_set_file_extent_offset(leaf, fi, offset);
8458 } else if (dback->disk_bytenr < entry->bytenr) {
8461 offset = btrfs_file_extent_offset(leaf, fi);
8462 if (dback->disk_bytenr + offset < entry->bytenr) {
8463 fprintf(stderr, "Ref is before the entry start, please"
8464 " take a btrfs-image of this file system and "
8465 "send it to a btrfs developer, ref %Lu\n",
8466 dback->disk_bytenr);
8471 offset += dback->disk_bytenr;
8472 offset -= entry->bytenr;
8473 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8474 btrfs_set_file_extent_offset(leaf, fi, offset);
8477 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8480 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8481 * only do this if we aren't using compression, otherwise it's a
8484 if (!btrfs_file_extent_compression(leaf, fi))
8485 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8487 printf("ram bytes may be wrong?\n");
8488 btrfs_mark_buffer_dirty(leaf);
8490 err = btrfs_commit_transaction(trans, root);
8491 btrfs_release_path(path);
8492 return ret ? ret : err;
8495 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8496 struct extent_record *rec)
8498 struct extent_backref *back;
8499 struct data_backref *dback;
8500 struct extent_entry *entry, *best = NULL;
8503 int broken_entries = 0;
8508 * Metadata is easy and the backrefs should always agree on bytenr and
8509 * size, if not we've got bigger issues.
8514 list_for_each_entry(back, &rec->backrefs, list) {
8515 if (back->full_backref || !back->is_data)
8518 dback = to_data_backref(back);
8521 * We only pay attention to backrefs that we found a real
8524 if (dback->found_ref == 0)
8528 * For now we only catch when the bytes don't match, not the
8529 * bytenr. We can easily do this at the same time, but I want
8530 * to have a fs image to test on before we just add repair
8531 * functionality willy-nilly so we know we won't screw up the
8535 entry = find_entry(&entries, dback->disk_bytenr,
8538 entry = malloc(sizeof(struct extent_entry));
8543 memset(entry, 0, sizeof(*entry));
8544 entry->bytenr = dback->disk_bytenr;
8545 entry->bytes = dback->bytes;
8546 list_add_tail(&entry->list, &entries);
8551 * If we only have on entry we may think the entries agree when
8552 * in reality they don't so we have to do some extra checking.
8554 if (dback->disk_bytenr != rec->start ||
8555 dback->bytes != rec->nr || back->broken)
8566 /* Yay all the backrefs agree, carry on good sir */
8567 if (nr_entries <= 1 && !mismatch)
8570 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8571 "%Lu\n", rec->start);
8574 * First we want to see if the backrefs can agree amongst themselves who
8575 * is right, so figure out which one of the entries has the highest
8578 best = find_most_right_entry(&entries);
8581 * Ok so we may have an even split between what the backrefs think, so
8582 * this is where we use the extent ref to see what it thinks.
8585 entry = find_entry(&entries, rec->start, rec->nr);
8586 if (!entry && (!broken_entries || !rec->found_rec)) {
8587 fprintf(stderr, "Backrefs don't agree with each other "
8588 "and extent record doesn't agree with anybody,"
8589 " so we can't fix bytenr %Lu bytes %Lu\n",
8590 rec->start, rec->nr);
8593 } else if (!entry) {
8595 * Ok our backrefs were broken, we'll assume this is the
8596 * correct value and add an entry for this range.
8598 entry = malloc(sizeof(struct extent_entry));
8603 memset(entry, 0, sizeof(*entry));
8604 entry->bytenr = rec->start;
8605 entry->bytes = rec->nr;
8606 list_add_tail(&entry->list, &entries);
8610 best = find_most_right_entry(&entries);
8612 fprintf(stderr, "Backrefs and extent record evenly "
8613 "split on who is right, this is going to "
8614 "require user input to fix bytenr %Lu bytes "
8615 "%Lu\n", rec->start, rec->nr);
8622 * I don't think this can happen currently as we'll abort() if we catch
8623 * this case higher up, but in case somebody removes that we still can't
8624 * deal with it properly here yet, so just bail out of that's the case.
8626 if (best->bytenr != rec->start) {
8627 fprintf(stderr, "Extent start and backref starts don't match, "
8628 "please use btrfs-image on this file system and send "
8629 "it to a btrfs developer so they can make fsck fix "
8630 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8631 rec->start, rec->nr);
8637 * Ok great we all agreed on an extent record, let's go find the real
8638 * references and fix up the ones that don't match.
8640 list_for_each_entry(back, &rec->backrefs, list) {
8641 if (back->full_backref || !back->is_data)
8644 dback = to_data_backref(back);
8647 * Still ignoring backrefs that don't have a real ref attached
8650 if (dback->found_ref == 0)
8653 if (dback->bytes == best->bytes &&
8654 dback->disk_bytenr == best->bytenr)
8657 ret = repair_ref(info, path, dback, best);
8663 * Ok we messed with the actual refs, which means we need to drop our
8664 * entire cache and go back and rescan. I know this is a huge pain and
8665 * adds a lot of extra work, but it's the only way to be safe. Once all
8666 * the backrefs agree we may not need to do anything to the extent
8671 while (!list_empty(&entries)) {
8672 entry = list_entry(entries.next, struct extent_entry, list);
8673 list_del_init(&entry->list);
8679 static int process_duplicates(struct cache_tree *extent_cache,
8680 struct extent_record *rec)
8682 struct extent_record *good, *tmp;
8683 struct cache_extent *cache;
8687 * If we found a extent record for this extent then return, or if we
8688 * have more than one duplicate we are likely going to need to delete
8691 if (rec->found_rec || rec->num_duplicates > 1)
8694 /* Shouldn't happen but just in case */
8695 BUG_ON(!rec->num_duplicates);
8698 * So this happens if we end up with a backref that doesn't match the
8699 * actual extent entry. So either the backref is bad or the extent
8700 * entry is bad. Either way we want to have the extent_record actually
8701 * reflect what we found in the extent_tree, so we need to take the
8702 * duplicate out and use that as the extent_record since the only way we
8703 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8705 remove_cache_extent(extent_cache, &rec->cache);
8707 good = to_extent_record(rec->dups.next);
8708 list_del_init(&good->list);
8709 INIT_LIST_HEAD(&good->backrefs);
8710 INIT_LIST_HEAD(&good->dups);
8711 good->cache.start = good->start;
8712 good->cache.size = good->nr;
8713 good->content_checked = 0;
8714 good->owner_ref_checked = 0;
8715 good->num_duplicates = 0;
8716 good->refs = rec->refs;
8717 list_splice_init(&rec->backrefs, &good->backrefs);
8719 cache = lookup_cache_extent(extent_cache, good->start,
8723 tmp = container_of(cache, struct extent_record, cache);
8726 * If we find another overlapping extent and it's found_rec is
8727 * set then it's a duplicate and we need to try and delete
8730 if (tmp->found_rec || tmp->num_duplicates > 0) {
8731 if (list_empty(&good->list))
8732 list_add_tail(&good->list,
8733 &duplicate_extents);
8734 good->num_duplicates += tmp->num_duplicates + 1;
8735 list_splice_init(&tmp->dups, &good->dups);
8736 list_del_init(&tmp->list);
8737 list_add_tail(&tmp->list, &good->dups);
8738 remove_cache_extent(extent_cache, &tmp->cache);
8743 * Ok we have another non extent item backed extent rec, so lets
8744 * just add it to this extent and carry on like we did above.
8746 good->refs += tmp->refs;
8747 list_splice_init(&tmp->backrefs, &good->backrefs);
8748 remove_cache_extent(extent_cache, &tmp->cache);
8751 ret = insert_cache_extent(extent_cache, &good->cache);
8754 return good->num_duplicates ? 0 : 1;
8757 static int delete_duplicate_records(struct btrfs_root *root,
8758 struct extent_record *rec)
8760 struct btrfs_trans_handle *trans;
8761 LIST_HEAD(delete_list);
8762 struct btrfs_path path;
8763 struct extent_record *tmp, *good, *n;
8766 struct btrfs_key key;
8768 btrfs_init_path(&path);
8771 /* Find the record that covers all of the duplicates. */
8772 list_for_each_entry(tmp, &rec->dups, list) {
8773 if (good->start < tmp->start)
8775 if (good->nr > tmp->nr)
8778 if (tmp->start + tmp->nr < good->start + good->nr) {
8779 fprintf(stderr, "Ok we have overlapping extents that "
8780 "aren't completely covered by each other, this "
8781 "is going to require more careful thought. "
8782 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8783 tmp->start, tmp->nr, good->start, good->nr);
8790 list_add_tail(&rec->list, &delete_list);
8792 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8795 list_move_tail(&tmp->list, &delete_list);
8798 root = root->fs_info->extent_root;
8799 trans = btrfs_start_transaction(root, 1);
8800 if (IS_ERR(trans)) {
8801 ret = PTR_ERR(trans);
8805 list_for_each_entry(tmp, &delete_list, list) {
8806 if (tmp->found_rec == 0)
8808 key.objectid = tmp->start;
8809 key.type = BTRFS_EXTENT_ITEM_KEY;
8810 key.offset = tmp->nr;
8812 /* Shouldn't happen but just in case */
8813 if (tmp->metadata) {
8814 fprintf(stderr, "Well this shouldn't happen, extent "
8815 "record overlaps but is metadata? "
8816 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8820 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8826 ret = btrfs_del_item(trans, root, &path);
8829 btrfs_release_path(&path);
8832 err = btrfs_commit_transaction(trans, root);
8836 while (!list_empty(&delete_list)) {
8837 tmp = to_extent_record(delete_list.next);
8838 list_del_init(&tmp->list);
8844 while (!list_empty(&rec->dups)) {
8845 tmp = to_extent_record(rec->dups.next);
8846 list_del_init(&tmp->list);
8850 btrfs_release_path(&path);
8852 if (!ret && !nr_del)
8853 rec->num_duplicates = 0;
8855 return ret ? ret : nr_del;
8858 static int find_possible_backrefs(struct btrfs_fs_info *info,
8859 struct btrfs_path *path,
8860 struct cache_tree *extent_cache,
8861 struct extent_record *rec)
8863 struct btrfs_root *root;
8864 struct extent_backref *back;
8865 struct data_backref *dback;
8866 struct cache_extent *cache;
8867 struct btrfs_file_extent_item *fi;
8868 struct btrfs_key key;
8872 list_for_each_entry(back, &rec->backrefs, list) {
8873 /* Don't care about full backrefs (poor unloved backrefs) */
8874 if (back->full_backref || !back->is_data)
8877 dback = to_data_backref(back);
8879 /* We found this one, we don't need to do a lookup */
8880 if (dback->found_ref)
8883 key.objectid = dback->root;
8884 key.type = BTRFS_ROOT_ITEM_KEY;
8885 key.offset = (u64)-1;
8887 root = btrfs_read_fs_root(info, &key);
8889 /* No root, definitely a bad ref, skip */
8890 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8892 /* Other err, exit */
8894 return PTR_ERR(root);
8896 key.objectid = dback->owner;
8897 key.type = BTRFS_EXTENT_DATA_KEY;
8898 key.offset = dback->offset;
8899 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8901 btrfs_release_path(path);
8904 /* Didn't find it, we can carry on */
8909 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8910 struct btrfs_file_extent_item);
8911 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8912 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8913 btrfs_release_path(path);
8914 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8916 struct extent_record *tmp;
8917 tmp = container_of(cache, struct extent_record, cache);
8920 * If we found an extent record for the bytenr for this
8921 * particular backref then we can't add it to our
8922 * current extent record. We only want to add backrefs
8923 * that don't have a corresponding extent item in the
8924 * extent tree since they likely belong to this record
8925 * and we need to fix it if it doesn't match bytenrs.
8931 dback->found_ref += 1;
8932 dback->disk_bytenr = bytenr;
8933 dback->bytes = bytes;
8936 * Set this so the verify backref code knows not to trust the
8937 * values in this backref.
8946 * Record orphan data ref into corresponding root.
8948 * Return 0 if the extent item contains data ref and recorded.
8949 * Return 1 if the extent item contains no useful data ref
8950 * On that case, it may contains only shared_dataref or metadata backref
8951 * or the file extent exists(this should be handled by the extent bytenr
8953 * Return <0 if something goes wrong.
8955 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8956 struct extent_record *rec)
8958 struct btrfs_key key;
8959 struct btrfs_root *dest_root;
8960 struct extent_backref *back;
8961 struct data_backref *dback;
8962 struct orphan_data_extent *orphan;
8963 struct btrfs_path path;
8964 int recorded_data_ref = 0;
8969 btrfs_init_path(&path);
8970 list_for_each_entry(back, &rec->backrefs, list) {
8971 if (back->full_backref || !back->is_data ||
8972 !back->found_extent_tree)
8974 dback = to_data_backref(back);
8975 if (dback->found_ref)
8977 key.objectid = dback->root;
8978 key.type = BTRFS_ROOT_ITEM_KEY;
8979 key.offset = (u64)-1;
8981 dest_root = btrfs_read_fs_root(fs_info, &key);
8983 /* For non-exist root we just skip it */
8984 if (IS_ERR(dest_root) || !dest_root)
8987 key.objectid = dback->owner;
8988 key.type = BTRFS_EXTENT_DATA_KEY;
8989 key.offset = dback->offset;
8991 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8992 btrfs_release_path(&path);
8994 * For ret < 0, it's OK since the fs-tree may be corrupted,
8995 * we need to record it for inode/file extent rebuild.
8996 * For ret > 0, we record it only for file extent rebuild.
8997 * For ret == 0, the file extent exists but only bytenr
8998 * mismatch, let the original bytenr fix routine to handle,
9004 orphan = malloc(sizeof(*orphan));
9009 INIT_LIST_HEAD(&orphan->list);
9010 orphan->root = dback->root;
9011 orphan->objectid = dback->owner;
9012 orphan->offset = dback->offset;
9013 orphan->disk_bytenr = rec->cache.start;
9014 orphan->disk_len = rec->cache.size;
9015 list_add(&dest_root->orphan_data_extents, &orphan->list);
9016 recorded_data_ref = 1;
9019 btrfs_release_path(&path);
9021 return !recorded_data_ref;
9027 * when an incorrect extent item is found, this will delete
9028 * all of the existing entries for it and recreate them
9029 * based on what the tree scan found.
9031 static int fixup_extent_refs(struct btrfs_fs_info *info,
9032 struct cache_tree *extent_cache,
9033 struct extent_record *rec)
9035 struct btrfs_trans_handle *trans = NULL;
9037 struct btrfs_path path;
9038 struct list_head *cur = rec->backrefs.next;
9039 struct cache_extent *cache;
9040 struct extent_backref *back;
9044 if (rec->flag_block_full_backref)
9045 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9047 btrfs_init_path(&path);
9048 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9050 * Sometimes the backrefs themselves are so broken they don't
9051 * get attached to any meaningful rec, so first go back and
9052 * check any of our backrefs that we couldn't find and throw
9053 * them into the list if we find the backref so that
9054 * verify_backrefs can figure out what to do.
9056 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9061 /* step one, make sure all of the backrefs agree */
9062 ret = verify_backrefs(info, &path, rec);
9066 trans = btrfs_start_transaction(info->extent_root, 1);
9067 if (IS_ERR(trans)) {
9068 ret = PTR_ERR(trans);
9072 /* step two, delete all the existing records */
9073 ret = delete_extent_records(trans, info->extent_root, &path,
9079 /* was this block corrupt? If so, don't add references to it */
9080 cache = lookup_cache_extent(info->corrupt_blocks,
9081 rec->start, rec->max_size);
9087 /* step three, recreate all the refs we did find */
9088 while(cur != &rec->backrefs) {
9089 back = to_extent_backref(cur);
9093 * if we didn't find any references, don't create a
9096 if (!back->found_ref)
9099 rec->bad_full_backref = 0;
9100 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9108 int err = btrfs_commit_transaction(trans, info->extent_root);
9114 fprintf(stderr, "Repaired extent references for %llu\n",
9115 (unsigned long long)rec->start);
9117 btrfs_release_path(&path);
9121 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9122 struct extent_record *rec)
9124 struct btrfs_trans_handle *trans;
9125 struct btrfs_root *root = fs_info->extent_root;
9126 struct btrfs_path path;
9127 struct btrfs_extent_item *ei;
9128 struct btrfs_key key;
9132 key.objectid = rec->start;
9133 if (rec->metadata) {
9134 key.type = BTRFS_METADATA_ITEM_KEY;
9135 key.offset = rec->info_level;
9137 key.type = BTRFS_EXTENT_ITEM_KEY;
9138 key.offset = rec->max_size;
9141 trans = btrfs_start_transaction(root, 0);
9143 return PTR_ERR(trans);
9145 btrfs_init_path(&path);
9146 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9148 btrfs_release_path(&path);
9149 btrfs_commit_transaction(trans, root);
9152 fprintf(stderr, "Didn't find extent for %llu\n",
9153 (unsigned long long)rec->start);
9154 btrfs_release_path(&path);
9155 btrfs_commit_transaction(trans, root);
9159 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9160 struct btrfs_extent_item);
9161 flags = btrfs_extent_flags(path.nodes[0], ei);
9162 if (rec->flag_block_full_backref) {
9163 fprintf(stderr, "setting full backref on %llu\n",
9164 (unsigned long long)key.objectid);
9165 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9167 fprintf(stderr, "clearing full backref on %llu\n",
9168 (unsigned long long)key.objectid);
9169 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9171 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9172 btrfs_mark_buffer_dirty(path.nodes[0]);
9173 btrfs_release_path(&path);
9174 ret = btrfs_commit_transaction(trans, root);
9176 fprintf(stderr, "Repaired extent flags for %llu\n",
9177 (unsigned long long)rec->start);
9182 /* right now we only prune from the extent allocation tree */
9183 static int prune_one_block(struct btrfs_trans_handle *trans,
9184 struct btrfs_fs_info *info,
9185 struct btrfs_corrupt_block *corrupt)
9188 struct btrfs_path path;
9189 struct extent_buffer *eb;
9193 int level = corrupt->level + 1;
9195 btrfs_init_path(&path);
9197 /* we want to stop at the parent to our busted block */
9198 path.lowest_level = level;
9200 ret = btrfs_search_slot(trans, info->extent_root,
9201 &corrupt->key, &path, -1, 1);
9206 eb = path.nodes[level];
9213 * hopefully the search gave us the block we want to prune,
9214 * lets try that first
9216 slot = path.slots[level];
9217 found = btrfs_node_blockptr(eb, slot);
9218 if (found == corrupt->cache.start)
9221 nritems = btrfs_header_nritems(eb);
9223 /* the search failed, lets scan this node and hope we find it */
9224 for (slot = 0; slot < nritems; slot++) {
9225 found = btrfs_node_blockptr(eb, slot);
9226 if (found == corrupt->cache.start)
9230 * we couldn't find the bad block. TODO, search all the nodes for pointers
9233 if (eb == info->extent_root->node) {
9238 btrfs_release_path(&path);
9243 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9244 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9247 btrfs_release_path(&path);
9251 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9253 struct btrfs_trans_handle *trans = NULL;
9254 struct cache_extent *cache;
9255 struct btrfs_corrupt_block *corrupt;
9258 cache = search_cache_extent(info->corrupt_blocks, 0);
9262 trans = btrfs_start_transaction(info->extent_root, 1);
9264 return PTR_ERR(trans);
9266 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9267 prune_one_block(trans, info, corrupt);
9268 remove_cache_extent(info->corrupt_blocks, cache);
9271 return btrfs_commit_transaction(trans, info->extent_root);
9275 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9277 struct btrfs_block_group_cache *cache;
9282 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9283 &start, &end, EXTENT_DIRTY);
9286 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9291 cache = btrfs_lookup_first_block_group(fs_info, start);
9296 start = cache->key.objectid + cache->key.offset;
9300 static int check_extent_refs(struct btrfs_root *root,
9301 struct cache_tree *extent_cache)
9303 struct extent_record *rec;
9304 struct cache_extent *cache;
9310 * if we're doing a repair, we have to make sure
9311 * we don't allocate from the problem extents.
9312 * In the worst case, this will be all the
9315 cache = search_cache_extent(extent_cache, 0);
9317 rec = container_of(cache, struct extent_record, cache);
9318 set_extent_dirty(root->fs_info->excluded_extents,
9320 rec->start + rec->max_size - 1);
9321 cache = next_cache_extent(cache);
9324 /* pin down all the corrupted blocks too */
9325 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9327 set_extent_dirty(root->fs_info->excluded_extents,
9329 cache->start + cache->size - 1);
9330 cache = next_cache_extent(cache);
9332 prune_corrupt_blocks(root->fs_info);
9333 reset_cached_block_groups(root->fs_info);
9336 reset_cached_block_groups(root->fs_info);
9339 * We need to delete any duplicate entries we find first otherwise we
9340 * could mess up the extent tree when we have backrefs that actually
9341 * belong to a different extent item and not the weird duplicate one.
9343 while (repair && !list_empty(&duplicate_extents)) {
9344 rec = to_extent_record(duplicate_extents.next);
9345 list_del_init(&rec->list);
9347 /* Sometimes we can find a backref before we find an actual
9348 * extent, so we need to process it a little bit to see if there
9349 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9350 * if this is a backref screwup. If we need to delete stuff
9351 * process_duplicates() will return 0, otherwise it will return
9354 if (process_duplicates(extent_cache, rec))
9356 ret = delete_duplicate_records(root, rec);
9360 * delete_duplicate_records will return the number of entries
9361 * deleted, so if it's greater than 0 then we know we actually
9362 * did something and we need to remove.
9375 cache = search_cache_extent(extent_cache, 0);
9378 rec = container_of(cache, struct extent_record, cache);
9379 if (rec->num_duplicates) {
9380 fprintf(stderr, "extent item %llu has multiple extent "
9381 "items\n", (unsigned long long)rec->start);
9385 if (rec->refs != rec->extent_item_refs) {
9386 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9387 (unsigned long long)rec->start,
9388 (unsigned long long)rec->nr);
9389 fprintf(stderr, "extent item %llu, found %llu\n",
9390 (unsigned long long)rec->extent_item_refs,
9391 (unsigned long long)rec->refs);
9392 ret = record_orphan_data_extents(root->fs_info, rec);
9398 if (all_backpointers_checked(rec, 1)) {
9399 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9400 (unsigned long long)rec->start,
9401 (unsigned long long)rec->nr);
9405 if (!rec->owner_ref_checked) {
9406 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9407 (unsigned long long)rec->start,
9408 (unsigned long long)rec->nr);
9413 if (repair && fix) {
9414 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9420 if (rec->bad_full_backref) {
9421 fprintf(stderr, "bad full backref, on [%llu]\n",
9422 (unsigned long long)rec->start);
9424 ret = fixup_extent_flags(root->fs_info, rec);
9432 * Although it's not a extent ref's problem, we reuse this
9433 * routine for error reporting.
9434 * No repair function yet.
9436 if (rec->crossing_stripes) {
9438 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9439 rec->start, rec->start + rec->max_size);
9443 if (rec->wrong_chunk_type) {
9445 "bad extent [%llu, %llu), type mismatch with chunk\n",
9446 rec->start, rec->start + rec->max_size);
9450 remove_cache_extent(extent_cache, cache);
9451 free_all_extent_backrefs(rec);
9452 if (!init_extent_tree && repair && (!cur_err || fix))
9453 clear_extent_dirty(root->fs_info->excluded_extents,
9455 rec->start + rec->max_size - 1);
9460 if (ret && ret != -EAGAIN) {
9461 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9464 struct btrfs_trans_handle *trans;
9466 root = root->fs_info->extent_root;
9467 trans = btrfs_start_transaction(root, 1);
9468 if (IS_ERR(trans)) {
9469 ret = PTR_ERR(trans);
9473 btrfs_fix_block_accounting(trans, root);
9474 ret = btrfs_commit_transaction(trans, root);
9483 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9487 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9488 stripe_size = length;
9489 stripe_size /= num_stripes;
9490 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9491 stripe_size = length * 2;
9492 stripe_size /= num_stripes;
9493 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9494 stripe_size = length;
9495 stripe_size /= (num_stripes - 1);
9496 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9497 stripe_size = length;
9498 stripe_size /= (num_stripes - 2);
9500 stripe_size = length;
9506 * Check the chunk with its block group/dev list ref:
9507 * Return 0 if all refs seems valid.
9508 * Return 1 if part of refs seems valid, need later check for rebuild ref
9509 * like missing block group and needs to search extent tree to rebuild them.
9510 * Return -1 if essential refs are missing and unable to rebuild.
9512 static int check_chunk_refs(struct chunk_record *chunk_rec,
9513 struct block_group_tree *block_group_cache,
9514 struct device_extent_tree *dev_extent_cache,
9517 struct cache_extent *block_group_item;
9518 struct block_group_record *block_group_rec;
9519 struct cache_extent *dev_extent_item;
9520 struct device_extent_record *dev_extent_rec;
9524 int metadump_v2 = 0;
9528 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9531 if (block_group_item) {
9532 block_group_rec = container_of(block_group_item,
9533 struct block_group_record,
9535 if (chunk_rec->length != block_group_rec->offset ||
9536 chunk_rec->offset != block_group_rec->objectid ||
9538 chunk_rec->type_flags != block_group_rec->flags)) {
9541 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9542 chunk_rec->objectid,
9547 chunk_rec->type_flags,
9548 block_group_rec->objectid,
9549 block_group_rec->type,
9550 block_group_rec->offset,
9551 block_group_rec->offset,
9552 block_group_rec->objectid,
9553 block_group_rec->flags);
9556 list_del_init(&block_group_rec->list);
9557 chunk_rec->bg_rec = block_group_rec;
9562 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9563 chunk_rec->objectid,
9568 chunk_rec->type_flags);
9575 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9576 chunk_rec->num_stripes);
9577 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9578 devid = chunk_rec->stripes[i].devid;
9579 offset = chunk_rec->stripes[i].offset;
9580 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9581 devid, offset, length);
9582 if (dev_extent_item) {
9583 dev_extent_rec = container_of(dev_extent_item,
9584 struct device_extent_record,
9586 if (dev_extent_rec->objectid != devid ||
9587 dev_extent_rec->offset != offset ||
9588 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9589 dev_extent_rec->length != length) {
9592 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9593 chunk_rec->objectid,
9596 chunk_rec->stripes[i].devid,
9597 chunk_rec->stripes[i].offset,
9598 dev_extent_rec->objectid,
9599 dev_extent_rec->offset,
9600 dev_extent_rec->length);
9603 list_move(&dev_extent_rec->chunk_list,
9604 &chunk_rec->dextents);
9609 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9610 chunk_rec->objectid,
9613 chunk_rec->stripes[i].devid,
9614 chunk_rec->stripes[i].offset);
9621 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9622 int check_chunks(struct cache_tree *chunk_cache,
9623 struct block_group_tree *block_group_cache,
9624 struct device_extent_tree *dev_extent_cache,
9625 struct list_head *good, struct list_head *bad,
9626 struct list_head *rebuild, int silent)
9628 struct cache_extent *chunk_item;
9629 struct chunk_record *chunk_rec;
9630 struct block_group_record *bg_rec;
9631 struct device_extent_record *dext_rec;
9635 chunk_item = first_cache_extent(chunk_cache);
9636 while (chunk_item) {
9637 chunk_rec = container_of(chunk_item, struct chunk_record,
9639 err = check_chunk_refs(chunk_rec, block_group_cache,
9640 dev_extent_cache, silent);
9643 if (err == 0 && good)
9644 list_add_tail(&chunk_rec->list, good);
9645 if (err > 0 && rebuild)
9646 list_add_tail(&chunk_rec->list, rebuild);
9648 list_add_tail(&chunk_rec->list, bad);
9649 chunk_item = next_cache_extent(chunk_item);
9652 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9655 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9663 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9667 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9678 static int check_device_used(struct device_record *dev_rec,
9679 struct device_extent_tree *dext_cache)
9681 struct cache_extent *cache;
9682 struct device_extent_record *dev_extent_rec;
9685 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9687 dev_extent_rec = container_of(cache,
9688 struct device_extent_record,
9690 if (dev_extent_rec->objectid != dev_rec->devid)
9693 list_del_init(&dev_extent_rec->device_list);
9694 total_byte += dev_extent_rec->length;
9695 cache = next_cache_extent(cache);
9698 if (total_byte != dev_rec->byte_used) {
9700 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9701 total_byte, dev_rec->byte_used, dev_rec->objectid,
9702 dev_rec->type, dev_rec->offset);
9709 /* check btrfs_dev_item -> btrfs_dev_extent */
9710 static int check_devices(struct rb_root *dev_cache,
9711 struct device_extent_tree *dev_extent_cache)
9713 struct rb_node *dev_node;
9714 struct device_record *dev_rec;
9715 struct device_extent_record *dext_rec;
9719 dev_node = rb_first(dev_cache);
9721 dev_rec = container_of(dev_node, struct device_record, node);
9722 err = check_device_used(dev_rec, dev_extent_cache);
9726 dev_node = rb_next(dev_node);
9728 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9731 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9732 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9739 static int add_root_item_to_list(struct list_head *head,
9740 u64 objectid, u64 bytenr, u64 last_snapshot,
9741 u8 level, u8 drop_level,
9742 int level_size, struct btrfs_key *drop_key)
9745 struct root_item_record *ri_rec;
9746 ri_rec = malloc(sizeof(*ri_rec));
9749 ri_rec->bytenr = bytenr;
9750 ri_rec->objectid = objectid;
9751 ri_rec->level = level;
9752 ri_rec->level_size = level_size;
9753 ri_rec->drop_level = drop_level;
9754 ri_rec->last_snapshot = last_snapshot;
9756 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9757 list_add_tail(&ri_rec->list, head);
9762 static void free_root_item_list(struct list_head *list)
9764 struct root_item_record *ri_rec;
9766 while (!list_empty(list)) {
9767 ri_rec = list_first_entry(list, struct root_item_record,
9769 list_del_init(&ri_rec->list);
9774 static int deal_root_from_list(struct list_head *list,
9775 struct btrfs_root *root,
9776 struct block_info *bits,
9778 struct cache_tree *pending,
9779 struct cache_tree *seen,
9780 struct cache_tree *reada,
9781 struct cache_tree *nodes,
9782 struct cache_tree *extent_cache,
9783 struct cache_tree *chunk_cache,
9784 struct rb_root *dev_cache,
9785 struct block_group_tree *block_group_cache,
9786 struct device_extent_tree *dev_extent_cache)
9791 while (!list_empty(list)) {
9792 struct root_item_record *rec;
9793 struct extent_buffer *buf;
9794 rec = list_entry(list->next,
9795 struct root_item_record, list);
9797 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
9798 if (!extent_buffer_uptodate(buf)) {
9799 free_extent_buffer(buf);
9803 ret = add_root_to_pending(buf, extent_cache, pending,
9804 seen, nodes, rec->objectid);
9808 * To rebuild extent tree, we need deal with snapshot
9809 * one by one, otherwise we deal with node firstly which
9810 * can maximize readahead.
9813 ret = run_next_block(root, bits, bits_nr, &last,
9814 pending, seen, reada, nodes,
9815 extent_cache, chunk_cache,
9816 dev_cache, block_group_cache,
9817 dev_extent_cache, rec);
9821 free_extent_buffer(buf);
9822 list_del(&rec->list);
9828 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9829 reada, nodes, extent_cache, chunk_cache,
9830 dev_cache, block_group_cache,
9831 dev_extent_cache, NULL);
9841 static int check_chunks_and_extents(struct btrfs_root *root)
9843 struct rb_root dev_cache;
9844 struct cache_tree chunk_cache;
9845 struct block_group_tree block_group_cache;
9846 struct device_extent_tree dev_extent_cache;
9847 struct cache_tree extent_cache;
9848 struct cache_tree seen;
9849 struct cache_tree pending;
9850 struct cache_tree reada;
9851 struct cache_tree nodes;
9852 struct extent_io_tree excluded_extents;
9853 struct cache_tree corrupt_blocks;
9854 struct btrfs_path path;
9855 struct btrfs_key key;
9856 struct btrfs_key found_key;
9858 struct block_info *bits;
9860 struct extent_buffer *leaf;
9862 struct btrfs_root_item ri;
9863 struct list_head dropping_trees;
9864 struct list_head normal_trees;
9865 struct btrfs_root *root1;
9870 dev_cache = RB_ROOT;
9871 cache_tree_init(&chunk_cache);
9872 block_group_tree_init(&block_group_cache);
9873 device_extent_tree_init(&dev_extent_cache);
9875 cache_tree_init(&extent_cache);
9876 cache_tree_init(&seen);
9877 cache_tree_init(&pending);
9878 cache_tree_init(&nodes);
9879 cache_tree_init(&reada);
9880 cache_tree_init(&corrupt_blocks);
9881 extent_io_tree_init(&excluded_extents);
9882 INIT_LIST_HEAD(&dropping_trees);
9883 INIT_LIST_HEAD(&normal_trees);
9886 root->fs_info->excluded_extents = &excluded_extents;
9887 root->fs_info->fsck_extent_cache = &extent_cache;
9888 root->fs_info->free_extent_hook = free_extent_hook;
9889 root->fs_info->corrupt_blocks = &corrupt_blocks;
9893 bits = malloc(bits_nr * sizeof(struct block_info));
9899 if (ctx.progress_enabled) {
9900 ctx.tp = TASK_EXTENTS;
9901 task_start(ctx.info);
9905 root1 = root->fs_info->tree_root;
9906 level = btrfs_header_level(root1->node);
9907 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9908 root1->node->start, 0, level, 0,
9909 root1->fs_info->nodesize, NULL);
9912 root1 = root->fs_info->chunk_root;
9913 level = btrfs_header_level(root1->node);
9914 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9915 root1->node->start, 0, level, 0,
9916 root1->fs_info->nodesize, NULL);
9919 btrfs_init_path(&path);
9922 key.type = BTRFS_ROOT_ITEM_KEY;
9923 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9928 leaf = path.nodes[0];
9929 slot = path.slots[0];
9930 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9931 ret = btrfs_next_leaf(root, &path);
9934 leaf = path.nodes[0];
9935 slot = path.slots[0];
9937 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9938 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9939 unsigned long offset;
9942 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9943 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9944 last_snapshot = btrfs_root_last_snapshot(&ri);
9945 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9946 level = btrfs_root_level(&ri);
9947 level_size = root->fs_info->nodesize;
9948 ret = add_root_item_to_list(&normal_trees,
9950 btrfs_root_bytenr(&ri),
9951 last_snapshot, level,
9952 0, level_size, NULL);
9956 level = btrfs_root_level(&ri);
9957 level_size = root->fs_info->nodesize;
9958 objectid = found_key.objectid;
9959 btrfs_disk_key_to_cpu(&found_key,
9961 ret = add_root_item_to_list(&dropping_trees,
9963 btrfs_root_bytenr(&ri),
9964 last_snapshot, level,
9966 level_size, &found_key);
9973 btrfs_release_path(&path);
9976 * check_block can return -EAGAIN if it fixes something, please keep
9977 * this in mind when dealing with return values from these functions, if
9978 * we get -EAGAIN we want to fall through and restart the loop.
9980 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9981 &seen, &reada, &nodes, &extent_cache,
9982 &chunk_cache, &dev_cache, &block_group_cache,
9989 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9990 &pending, &seen, &reada, &nodes,
9991 &extent_cache, &chunk_cache, &dev_cache,
9992 &block_group_cache, &dev_extent_cache);
9999 ret = check_chunks(&chunk_cache, &block_group_cache,
10000 &dev_extent_cache, NULL, NULL, NULL, 0);
10002 if (ret == -EAGAIN)
10007 ret = check_extent_refs(root, &extent_cache);
10009 if (ret == -EAGAIN)
10014 ret = check_devices(&dev_cache, &dev_extent_cache);
10019 task_stop(ctx.info);
10021 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10022 extent_io_tree_cleanup(&excluded_extents);
10023 root->fs_info->fsck_extent_cache = NULL;
10024 root->fs_info->free_extent_hook = NULL;
10025 root->fs_info->corrupt_blocks = NULL;
10026 root->fs_info->excluded_extents = NULL;
10029 free_chunk_cache_tree(&chunk_cache);
10030 free_device_cache_tree(&dev_cache);
10031 free_block_group_tree(&block_group_cache);
10032 free_device_extent_tree(&dev_extent_cache);
10033 free_extent_cache_tree(&seen);
10034 free_extent_cache_tree(&pending);
10035 free_extent_cache_tree(&reada);
10036 free_extent_cache_tree(&nodes);
10037 free_root_item_list(&normal_trees);
10038 free_root_item_list(&dropping_trees);
10041 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10042 free_extent_cache_tree(&seen);
10043 free_extent_cache_tree(&pending);
10044 free_extent_cache_tree(&reada);
10045 free_extent_cache_tree(&nodes);
10046 free_chunk_cache_tree(&chunk_cache);
10047 free_block_group_tree(&block_group_cache);
10048 free_device_cache_tree(&dev_cache);
10049 free_device_extent_tree(&dev_extent_cache);
10050 free_extent_record_cache(&extent_cache);
10051 free_root_item_list(&normal_trees);
10052 free_root_item_list(&dropping_trees);
10053 extent_io_tree_cleanup(&excluded_extents);
10058 * Check backrefs of a tree block given by @bytenr or @eb.
10060 * @root: the root containing the @bytenr or @eb
10061 * @eb: tree block extent buffer, can be NULL
10062 * @bytenr: bytenr of the tree block to search
10063 * @level: tree level of the tree block
10064 * @owner: owner of the tree block
10066 * Return >0 for any error found and output error message
10067 * Return 0 for no error found
10069 static int check_tree_block_ref(struct btrfs_root *root,
10070 struct extent_buffer *eb, u64 bytenr,
10071 int level, u64 owner)
10073 struct btrfs_key key;
10074 struct btrfs_root *extent_root = root->fs_info->extent_root;
10075 struct btrfs_path path;
10076 struct btrfs_extent_item *ei;
10077 struct btrfs_extent_inline_ref *iref;
10078 struct extent_buffer *leaf;
10084 u32 nodesize = root->fs_info->nodesize;
10087 int tree_reloc_root = 0;
10092 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10093 btrfs_header_bytenr(root->node) == bytenr)
10094 tree_reloc_root = 1;
10096 btrfs_init_path(&path);
10097 key.objectid = bytenr;
10098 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10099 key.type = BTRFS_METADATA_ITEM_KEY;
10101 key.type = BTRFS_EXTENT_ITEM_KEY;
10102 key.offset = (u64)-1;
10104 /* Search for the backref in extent tree */
10105 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10107 err |= BACKREF_MISSING;
10110 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10112 err |= BACKREF_MISSING;
10116 leaf = path.nodes[0];
10117 slot = path.slots[0];
10118 btrfs_item_key_to_cpu(leaf, &key, slot);
10120 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10122 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10123 skinny_level = (int)key.offset;
10124 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10126 struct btrfs_tree_block_info *info;
10128 info = (struct btrfs_tree_block_info *)(ei + 1);
10129 skinny_level = btrfs_tree_block_level(leaf, info);
10130 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10137 if (!(btrfs_extent_flags(leaf, ei) &
10138 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10140 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10141 key.objectid, nodesize,
10142 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10143 err = BACKREF_MISMATCH;
10145 header_gen = btrfs_header_generation(eb);
10146 extent_gen = btrfs_extent_generation(leaf, ei);
10147 if (header_gen != extent_gen) {
10149 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10150 key.objectid, nodesize, header_gen,
10152 err = BACKREF_MISMATCH;
10154 if (level != skinny_level) {
10156 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10157 key.objectid, nodesize, level, skinny_level);
10158 err = BACKREF_MISMATCH;
10160 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10162 "extent[%llu %u] is referred by other roots than %llu",
10163 key.objectid, nodesize, root->objectid);
10164 err = BACKREF_MISMATCH;
10169 * Iterate the extent/metadata item to find the exact backref
10171 item_size = btrfs_item_size_nr(leaf, slot);
10172 ptr = (unsigned long)iref;
10173 end = (unsigned long)ei + item_size;
10174 while (ptr < end) {
10175 iref = (struct btrfs_extent_inline_ref *)ptr;
10176 type = btrfs_extent_inline_ref_type(leaf, iref);
10177 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10179 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10180 (offset == root->objectid || offset == owner)) {
10182 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10184 * Backref of tree reloc root points to itself, no need
10185 * to check backref any more.
10187 if (tree_reloc_root)
10190 /* Check if the backref points to valid referencer */
10191 found_ref = !check_tree_block_ref(root, NULL,
10192 offset, level + 1, owner);
10197 ptr += btrfs_extent_inline_ref_size(type);
10201 * Inlined extent item doesn't have what we need, check
10202 * TREE_BLOCK_REF_KEY
10205 btrfs_release_path(&path);
10206 key.objectid = bytenr;
10207 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10208 key.offset = root->objectid;
10210 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10215 err |= BACKREF_MISSING;
10217 btrfs_release_path(&path);
10218 if (eb && (err & BACKREF_MISSING))
10219 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10220 bytenr, nodesize, owner, level);
10225 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10227 * Return >0 any error found and output error message
10228 * Return 0 for no error found
10230 static int check_extent_data_item(struct btrfs_root *root,
10231 struct extent_buffer *eb, int slot)
10233 struct btrfs_file_extent_item *fi;
10234 struct btrfs_path path;
10235 struct btrfs_root *extent_root = root->fs_info->extent_root;
10236 struct btrfs_key fi_key;
10237 struct btrfs_key dbref_key;
10238 struct extent_buffer *leaf;
10239 struct btrfs_extent_item *ei;
10240 struct btrfs_extent_inline_ref *iref;
10241 struct btrfs_extent_data_ref *dref;
10244 u64 disk_num_bytes;
10245 u64 extent_num_bytes;
10252 int found_dbackref = 0;
10256 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10257 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10259 /* Nothing to check for hole and inline data extents */
10260 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10261 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10264 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10265 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10266 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10268 /* Check unaligned disk_num_bytes and num_bytes */
10269 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10271 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10272 fi_key.objectid, fi_key.offset, disk_num_bytes,
10273 root->fs_info->sectorsize);
10274 err |= BYTES_UNALIGNED;
10276 data_bytes_allocated += disk_num_bytes;
10278 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10280 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10281 fi_key.objectid, fi_key.offset, extent_num_bytes,
10282 root->fs_info->sectorsize);
10283 err |= BYTES_UNALIGNED;
10285 data_bytes_referenced += extent_num_bytes;
10287 owner = btrfs_header_owner(eb);
10289 /* Check the extent item of the file extent in extent tree */
10290 btrfs_init_path(&path);
10291 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10292 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10293 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10295 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10299 leaf = path.nodes[0];
10300 slot = path.slots[0];
10301 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10303 extent_flags = btrfs_extent_flags(leaf, ei);
10305 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10307 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10308 disk_bytenr, disk_num_bytes,
10309 BTRFS_EXTENT_FLAG_DATA);
10310 err |= BACKREF_MISMATCH;
10313 /* Check data backref inside that extent item */
10314 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10315 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10316 ptr = (unsigned long)iref;
10317 end = (unsigned long)ei + item_size;
10318 while (ptr < end) {
10319 iref = (struct btrfs_extent_inline_ref *)ptr;
10320 type = btrfs_extent_inline_ref_type(leaf, iref);
10321 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10323 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10324 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10325 if (ref_root == owner || ref_root == root->objectid)
10326 found_dbackref = 1;
10327 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10328 found_dbackref = !check_tree_block_ref(root, NULL,
10329 btrfs_extent_inline_ref_offset(leaf, iref),
10333 if (found_dbackref)
10335 ptr += btrfs_extent_inline_ref_size(type);
10338 if (!found_dbackref) {
10339 btrfs_release_path(&path);
10341 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10342 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10343 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10344 dbref_key.offset = hash_extent_data_ref(root->objectid,
10345 fi_key.objectid, fi_key.offset);
10347 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10348 &dbref_key, &path, 0, 0);
10350 found_dbackref = 1;
10354 btrfs_release_path(&path);
10357 * Neither inlined nor EXTENT_DATA_REF found, try
10358 * SHARED_DATA_REF as last chance.
10360 dbref_key.objectid = disk_bytenr;
10361 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10362 dbref_key.offset = eb->start;
10364 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10365 &dbref_key, &path, 0, 0);
10367 found_dbackref = 1;
10373 if (!found_dbackref)
10374 err |= BACKREF_MISSING;
10375 btrfs_release_path(&path);
10376 if (err & BACKREF_MISSING) {
10377 error("data extent[%llu %llu] backref lost",
10378 disk_bytenr, disk_num_bytes);
10384 * Get real tree block level for the case like shared block
10385 * Return >= 0 as tree level
10386 * Return <0 for error
10388 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10390 struct extent_buffer *eb;
10391 struct btrfs_path path;
10392 struct btrfs_key key;
10393 struct btrfs_extent_item *ei;
10400 /* Search extent tree for extent generation and level */
10401 key.objectid = bytenr;
10402 key.type = BTRFS_METADATA_ITEM_KEY;
10403 key.offset = (u64)-1;
10405 btrfs_init_path(&path);
10406 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10409 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10417 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10418 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10419 struct btrfs_extent_item);
10420 flags = btrfs_extent_flags(path.nodes[0], ei);
10421 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10426 /* Get transid for later read_tree_block() check */
10427 transid = btrfs_extent_generation(path.nodes[0], ei);
10429 /* Get backref level as one source */
10430 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10431 backref_level = key.offset;
10433 struct btrfs_tree_block_info *info;
10435 info = (struct btrfs_tree_block_info *)(ei + 1);
10436 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10438 btrfs_release_path(&path);
10440 /* Get level from tree block as an alternative source */
10441 eb = read_tree_block(fs_info, bytenr, transid);
10442 if (!extent_buffer_uptodate(eb)) {
10443 free_extent_buffer(eb);
10446 header_level = btrfs_header_level(eb);
10447 free_extent_buffer(eb);
10449 if (header_level != backref_level)
10451 return header_level;
10454 btrfs_release_path(&path);
10459 * Check if a tree block backref is valid (points to a valid tree block)
10460 * if level == -1, level will be resolved
10461 * Return >0 for any error found and print error message
10463 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10464 u64 bytenr, int level)
10466 struct btrfs_root *root;
10467 struct btrfs_key key;
10468 struct btrfs_path path;
10469 struct extent_buffer *eb;
10470 struct extent_buffer *node;
10471 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10475 /* Query level for level == -1 special case */
10477 level = query_tree_block_level(fs_info, bytenr);
10479 err |= REFERENCER_MISSING;
10483 key.objectid = root_id;
10484 key.type = BTRFS_ROOT_ITEM_KEY;
10485 key.offset = (u64)-1;
10487 root = btrfs_read_fs_root(fs_info, &key);
10488 if (IS_ERR(root)) {
10489 err |= REFERENCER_MISSING;
10493 /* Read out the tree block to get item/node key */
10494 eb = read_tree_block(fs_info, bytenr, 0);
10495 if (!extent_buffer_uptodate(eb)) {
10496 err |= REFERENCER_MISSING;
10497 free_extent_buffer(eb);
10501 /* Empty tree, no need to check key */
10502 if (!btrfs_header_nritems(eb) && !level) {
10503 free_extent_buffer(eb);
10508 btrfs_node_key_to_cpu(eb, &key, 0);
10510 btrfs_item_key_to_cpu(eb, &key, 0);
10512 free_extent_buffer(eb);
10514 btrfs_init_path(&path);
10515 path.lowest_level = level;
10516 /* Search with the first key, to ensure we can reach it */
10517 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10519 err |= REFERENCER_MISSING;
10523 node = path.nodes[level];
10524 if (btrfs_header_bytenr(node) != bytenr) {
10526 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10527 bytenr, nodesize, bytenr,
10528 btrfs_header_bytenr(node));
10529 err |= REFERENCER_MISMATCH;
10531 if (btrfs_header_level(node) != level) {
10533 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10534 bytenr, nodesize, level,
10535 btrfs_header_level(node));
10536 err |= REFERENCER_MISMATCH;
10540 btrfs_release_path(&path);
10542 if (err & REFERENCER_MISSING) {
10544 error("extent [%llu %d] lost referencer (owner: %llu)",
10545 bytenr, nodesize, root_id);
10548 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10549 bytenr, nodesize, root_id, level);
10556 * Check if tree block @eb is tree reloc root.
10557 * Return 0 if it's not or any problem happens
10558 * Return 1 if it's a tree reloc root
10560 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10561 struct extent_buffer *eb)
10563 struct btrfs_root *tree_reloc_root;
10564 struct btrfs_key key;
10565 u64 bytenr = btrfs_header_bytenr(eb);
10566 u64 owner = btrfs_header_owner(eb);
10569 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10570 key.offset = owner;
10571 key.type = BTRFS_ROOT_ITEM_KEY;
10573 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10574 if (IS_ERR(tree_reloc_root))
10577 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10579 btrfs_free_fs_root(tree_reloc_root);
10584 * Check referencer for shared block backref
10585 * If level == -1, this function will resolve the level.
10587 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10588 u64 parent, u64 bytenr, int level)
10590 struct extent_buffer *eb;
10592 int found_parent = 0;
10595 eb = read_tree_block(fs_info, parent, 0);
10596 if (!extent_buffer_uptodate(eb))
10600 level = query_tree_block_level(fs_info, bytenr);
10604 /* It's possible it's a tree reloc root */
10605 if (parent == bytenr) {
10606 if (is_tree_reloc_root(fs_info, eb))
10611 if (level + 1 != btrfs_header_level(eb))
10614 nr = btrfs_header_nritems(eb);
10615 for (i = 0; i < nr; i++) {
10616 if (bytenr == btrfs_node_blockptr(eb, i)) {
10622 free_extent_buffer(eb);
10623 if (!found_parent) {
10625 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10626 bytenr, fs_info->nodesize, parent, level);
10627 return REFERENCER_MISSING;
10633 * Check referencer for normal (inlined) data ref
10634 * If len == 0, it will be resolved by searching in extent tree
10636 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10637 u64 root_id, u64 objectid, u64 offset,
10638 u64 bytenr, u64 len, u32 count)
10640 struct btrfs_root *root;
10641 struct btrfs_root *extent_root = fs_info->extent_root;
10642 struct btrfs_key key;
10643 struct btrfs_path path;
10644 struct extent_buffer *leaf;
10645 struct btrfs_file_extent_item *fi;
10646 u32 found_count = 0;
10651 key.objectid = bytenr;
10652 key.type = BTRFS_EXTENT_ITEM_KEY;
10653 key.offset = (u64)-1;
10655 btrfs_init_path(&path);
10656 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10659 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10662 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10663 if (key.objectid != bytenr ||
10664 key.type != BTRFS_EXTENT_ITEM_KEY)
10667 btrfs_release_path(&path);
10669 key.objectid = root_id;
10670 key.type = BTRFS_ROOT_ITEM_KEY;
10671 key.offset = (u64)-1;
10672 btrfs_init_path(&path);
10674 root = btrfs_read_fs_root(fs_info, &key);
10678 key.objectid = objectid;
10679 key.type = BTRFS_EXTENT_DATA_KEY;
10681 * It can be nasty as data backref offset is
10682 * file offset - file extent offset, which is smaller or
10683 * equal to original backref offset. The only special case is
10684 * overflow. So we need to special check and do further search.
10686 key.offset = offset & (1ULL << 63) ? 0 : offset;
10688 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10693 * Search afterwards to get correct one
10694 * NOTE: As we must do a comprehensive check on the data backref to
10695 * make sure the dref count also matches, we must iterate all file
10696 * extents for that inode.
10699 leaf = path.nodes[0];
10700 slot = path.slots[0];
10702 if (slot >= btrfs_header_nritems(leaf))
10704 btrfs_item_key_to_cpu(leaf, &key, slot);
10705 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10707 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10709 * Except normal disk bytenr and disk num bytes, we still
10710 * need to do extra check on dbackref offset as
10711 * dbackref offset = file_offset - file_extent_offset
10713 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10714 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10715 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10720 ret = btrfs_next_item(root, &path);
10725 btrfs_release_path(&path);
10726 if (found_count != count) {
10728 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10729 bytenr, len, root_id, objectid, offset, count, found_count);
10730 return REFERENCER_MISSING;
10736 * Check if the referencer of a shared data backref exists
10738 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10739 u64 parent, u64 bytenr)
10741 struct extent_buffer *eb;
10742 struct btrfs_key key;
10743 struct btrfs_file_extent_item *fi;
10745 int found_parent = 0;
10748 eb = read_tree_block(fs_info, parent, 0);
10749 if (!extent_buffer_uptodate(eb))
10752 nr = btrfs_header_nritems(eb);
10753 for (i = 0; i < nr; i++) {
10754 btrfs_item_key_to_cpu(eb, &key, i);
10755 if (key.type != BTRFS_EXTENT_DATA_KEY)
10758 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10759 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10762 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10769 free_extent_buffer(eb);
10770 if (!found_parent) {
10771 error("shared extent %llu referencer lost (parent: %llu)",
10773 return REFERENCER_MISSING;
10779 * This function will check a given extent item, including its backref and
10780 * itself (like crossing stripe boundary and type)
10782 * Since we don't use extent_record anymore, introduce new error bit
10784 static int check_extent_item(struct btrfs_fs_info *fs_info,
10785 struct extent_buffer *eb, int slot)
10787 struct btrfs_extent_item *ei;
10788 struct btrfs_extent_inline_ref *iref;
10789 struct btrfs_extent_data_ref *dref;
10793 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10794 u32 item_size = btrfs_item_size_nr(eb, slot);
10799 struct btrfs_key key;
10803 btrfs_item_key_to_cpu(eb, &key, slot);
10804 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10805 bytes_used += key.offset;
10807 bytes_used += nodesize;
10809 if (item_size < sizeof(*ei)) {
10811 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10812 * old thing when on disk format is still un-determined.
10813 * No need to care about it anymore
10815 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10819 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10820 flags = btrfs_extent_flags(eb, ei);
10822 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10824 if (metadata && check_crossing_stripes(global_info, key.objectid,
10826 error("bad metadata [%llu, %llu) crossing stripe boundary",
10827 key.objectid, key.objectid + nodesize);
10828 err |= CROSSING_STRIPE_BOUNDARY;
10831 ptr = (unsigned long)(ei + 1);
10833 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10834 /* Old EXTENT_ITEM metadata */
10835 struct btrfs_tree_block_info *info;
10837 info = (struct btrfs_tree_block_info *)ptr;
10838 level = btrfs_tree_block_level(eb, info);
10839 ptr += sizeof(struct btrfs_tree_block_info);
10841 /* New METADATA_ITEM */
10842 level = key.offset;
10844 end = (unsigned long)ei + item_size;
10847 /* Reached extent item end normally */
10851 /* Beyond extent item end, wrong item size */
10853 err |= ITEM_SIZE_MISMATCH;
10854 error("extent item at bytenr %llu slot %d has wrong size",
10859 /* Now check every backref in this extent item */
10860 iref = (struct btrfs_extent_inline_ref *)ptr;
10861 type = btrfs_extent_inline_ref_type(eb, iref);
10862 offset = btrfs_extent_inline_ref_offset(eb, iref);
10864 case BTRFS_TREE_BLOCK_REF_KEY:
10865 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10869 case BTRFS_SHARED_BLOCK_REF_KEY:
10870 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10874 case BTRFS_EXTENT_DATA_REF_KEY:
10875 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10876 ret = check_extent_data_backref(fs_info,
10877 btrfs_extent_data_ref_root(eb, dref),
10878 btrfs_extent_data_ref_objectid(eb, dref),
10879 btrfs_extent_data_ref_offset(eb, dref),
10880 key.objectid, key.offset,
10881 btrfs_extent_data_ref_count(eb, dref));
10884 case BTRFS_SHARED_DATA_REF_KEY:
10885 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10889 error("extent[%llu %d %llu] has unknown ref type: %d",
10890 key.objectid, key.type, key.offset, type);
10891 err |= UNKNOWN_TYPE;
10895 ptr += btrfs_extent_inline_ref_size(type);
10903 * Check if a dev extent item is referred correctly by its chunk
10905 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10906 struct extent_buffer *eb, int slot)
10908 struct btrfs_root *chunk_root = fs_info->chunk_root;
10909 struct btrfs_dev_extent *ptr;
10910 struct btrfs_path path;
10911 struct btrfs_key chunk_key;
10912 struct btrfs_key devext_key;
10913 struct btrfs_chunk *chunk;
10914 struct extent_buffer *l;
10918 int found_chunk = 0;
10921 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10922 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10923 length = btrfs_dev_extent_length(eb, ptr);
10925 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10926 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10927 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10929 btrfs_init_path(&path);
10930 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10935 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10936 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10941 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10944 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10945 for (i = 0; i < num_stripes; i++) {
10946 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10947 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10949 if (devid == devext_key.objectid &&
10950 offset == devext_key.offset) {
10956 btrfs_release_path(&path);
10957 if (!found_chunk) {
10959 "device extent[%llu, %llu, %llu] did not find the related chunk",
10960 devext_key.objectid, devext_key.offset, length);
10961 return REFERENCER_MISSING;
10967 * Check if the used space is correct with the dev item
10969 static int check_dev_item(struct btrfs_fs_info *fs_info,
10970 struct extent_buffer *eb, int slot)
10972 struct btrfs_root *dev_root = fs_info->dev_root;
10973 struct btrfs_dev_item *dev_item;
10974 struct btrfs_path path;
10975 struct btrfs_key key;
10976 struct btrfs_dev_extent *ptr;
10982 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10983 dev_id = btrfs_device_id(eb, dev_item);
10984 used = btrfs_device_bytes_used(eb, dev_item);
10986 key.objectid = dev_id;
10987 key.type = BTRFS_DEV_EXTENT_KEY;
10990 btrfs_init_path(&path);
10991 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10993 btrfs_item_key_to_cpu(eb, &key, slot);
10994 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10995 key.objectid, key.type, key.offset);
10996 btrfs_release_path(&path);
10997 return REFERENCER_MISSING;
11000 /* Iterate dev_extents to calculate the used space of a device */
11002 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
11005 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11006 if (key.objectid > dev_id)
11008 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11011 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11012 struct btrfs_dev_extent);
11013 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11015 ret = btrfs_next_item(dev_root, &path);
11019 btrfs_release_path(&path);
11021 if (used != total) {
11022 btrfs_item_key_to_cpu(eb, &key, slot);
11024 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11025 total, used, BTRFS_ROOT_TREE_OBJECTID,
11026 BTRFS_DEV_EXTENT_KEY, dev_id);
11027 return ACCOUNTING_MISMATCH;
11033 * Check a block group item with its referener (chunk) and its used space
11034 * with extent/metadata item
11036 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11037 struct extent_buffer *eb, int slot)
11039 struct btrfs_root *extent_root = fs_info->extent_root;
11040 struct btrfs_root *chunk_root = fs_info->chunk_root;
11041 struct btrfs_block_group_item *bi;
11042 struct btrfs_block_group_item bg_item;
11043 struct btrfs_path path;
11044 struct btrfs_key bg_key;
11045 struct btrfs_key chunk_key;
11046 struct btrfs_key extent_key;
11047 struct btrfs_chunk *chunk;
11048 struct extent_buffer *leaf;
11049 struct btrfs_extent_item *ei;
11050 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11058 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11059 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11060 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11061 used = btrfs_block_group_used(&bg_item);
11062 bg_flags = btrfs_block_group_flags(&bg_item);
11064 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11065 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11066 chunk_key.offset = bg_key.objectid;
11068 btrfs_init_path(&path);
11069 /* Search for the referencer chunk */
11070 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11073 "block group[%llu %llu] did not find the related chunk item",
11074 bg_key.objectid, bg_key.offset);
11075 err |= REFERENCER_MISSING;
11077 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11078 struct btrfs_chunk);
11079 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11082 "block group[%llu %llu] related chunk item length does not match",
11083 bg_key.objectid, bg_key.offset);
11084 err |= REFERENCER_MISMATCH;
11087 btrfs_release_path(&path);
11089 /* Search from the block group bytenr */
11090 extent_key.objectid = bg_key.objectid;
11091 extent_key.type = 0;
11092 extent_key.offset = 0;
11094 btrfs_init_path(&path);
11095 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11099 /* Iterate extent tree to account used space */
11101 leaf = path.nodes[0];
11103 /* Search slot can point to the last item beyond leaf nritems */
11104 if (path.slots[0] >= btrfs_header_nritems(leaf))
11107 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11108 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11111 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11112 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11114 if (extent_key.objectid < bg_key.objectid)
11117 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11120 total += extent_key.offset;
11122 ei = btrfs_item_ptr(leaf, path.slots[0],
11123 struct btrfs_extent_item);
11124 flags = btrfs_extent_flags(leaf, ei);
11125 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11126 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11128 "bad extent[%llu, %llu) type mismatch with chunk",
11129 extent_key.objectid,
11130 extent_key.objectid + extent_key.offset);
11131 err |= CHUNK_TYPE_MISMATCH;
11133 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11134 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11135 BTRFS_BLOCK_GROUP_METADATA))) {
11137 "bad extent[%llu, %llu) type mismatch with chunk",
11138 extent_key.objectid,
11139 extent_key.objectid + nodesize);
11140 err |= CHUNK_TYPE_MISMATCH;
11144 ret = btrfs_next_item(extent_root, &path);
11150 btrfs_release_path(&path);
11152 if (total != used) {
11154 "block group[%llu %llu] used %llu but extent items used %llu",
11155 bg_key.objectid, bg_key.offset, used, total);
11156 err |= ACCOUNTING_MISMATCH;
11162 * Check a chunk item.
11163 * Including checking all referred dev_extents and block group
11165 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11166 struct extent_buffer *eb, int slot)
11168 struct btrfs_root *extent_root = fs_info->extent_root;
11169 struct btrfs_root *dev_root = fs_info->dev_root;
11170 struct btrfs_path path;
11171 struct btrfs_key chunk_key;
11172 struct btrfs_key bg_key;
11173 struct btrfs_key devext_key;
11174 struct btrfs_chunk *chunk;
11175 struct extent_buffer *leaf;
11176 struct btrfs_block_group_item *bi;
11177 struct btrfs_block_group_item bg_item;
11178 struct btrfs_dev_extent *ptr;
11190 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11191 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11192 length = btrfs_chunk_length(eb, chunk);
11193 chunk_end = chunk_key.offset + length;
11194 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11197 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11199 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11202 type = btrfs_chunk_type(eb, chunk);
11204 bg_key.objectid = chunk_key.offset;
11205 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11206 bg_key.offset = length;
11208 btrfs_init_path(&path);
11209 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11212 "chunk[%llu %llu) did not find the related block group item",
11213 chunk_key.offset, chunk_end);
11214 err |= REFERENCER_MISSING;
11216 leaf = path.nodes[0];
11217 bi = btrfs_item_ptr(leaf, path.slots[0],
11218 struct btrfs_block_group_item);
11219 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11221 if (btrfs_block_group_flags(&bg_item) != type) {
11223 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11224 chunk_key.offset, chunk_end, type,
11225 btrfs_block_group_flags(&bg_item));
11226 err |= REFERENCER_MISSING;
11230 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11231 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11232 for (i = 0; i < num_stripes; i++) {
11233 btrfs_release_path(&path);
11234 btrfs_init_path(&path);
11235 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11236 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11237 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11239 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11242 goto not_match_dev;
11244 leaf = path.nodes[0];
11245 ptr = btrfs_item_ptr(leaf, path.slots[0],
11246 struct btrfs_dev_extent);
11247 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11248 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11249 if (objectid != chunk_key.objectid ||
11250 offset != chunk_key.offset ||
11251 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11252 goto not_match_dev;
11255 err |= BACKREF_MISSING;
11257 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11258 chunk_key.objectid, chunk_end, i);
11261 btrfs_release_path(&path);
11267 * Main entry function to check known items and update related accounting info
11269 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11271 struct btrfs_fs_info *fs_info = root->fs_info;
11272 struct btrfs_key key;
11275 struct btrfs_extent_data_ref *dref;
11280 btrfs_item_key_to_cpu(eb, &key, slot);
11284 case BTRFS_EXTENT_DATA_KEY:
11285 ret = check_extent_data_item(root, eb, slot);
11288 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11289 ret = check_block_group_item(fs_info, eb, slot);
11292 case BTRFS_DEV_ITEM_KEY:
11293 ret = check_dev_item(fs_info, eb, slot);
11296 case BTRFS_CHUNK_ITEM_KEY:
11297 ret = check_chunk_item(fs_info, eb, slot);
11300 case BTRFS_DEV_EXTENT_KEY:
11301 ret = check_dev_extent_item(fs_info, eb, slot);
11304 case BTRFS_EXTENT_ITEM_KEY:
11305 case BTRFS_METADATA_ITEM_KEY:
11306 ret = check_extent_item(fs_info, eb, slot);
11309 case BTRFS_EXTENT_CSUM_KEY:
11310 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11312 case BTRFS_TREE_BLOCK_REF_KEY:
11313 ret = check_tree_block_backref(fs_info, key.offset,
11317 case BTRFS_EXTENT_DATA_REF_KEY:
11318 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11319 ret = check_extent_data_backref(fs_info,
11320 btrfs_extent_data_ref_root(eb, dref),
11321 btrfs_extent_data_ref_objectid(eb, dref),
11322 btrfs_extent_data_ref_offset(eb, dref),
11324 btrfs_extent_data_ref_count(eb, dref));
11327 case BTRFS_SHARED_BLOCK_REF_KEY:
11328 ret = check_shared_block_backref(fs_info, key.offset,
11332 case BTRFS_SHARED_DATA_REF_KEY:
11333 ret = check_shared_data_backref(fs_info, key.offset,
11341 if (++slot < btrfs_header_nritems(eb))
11348 * Helper function for later fs/subvol tree check. To determine if a tree
11349 * block should be checked.
11350 * This function will ensure only the direct referencer with lowest rootid to
11351 * check a fs/subvolume tree block.
11353 * Backref check at extent tree would detect errors like missing subvolume
11354 * tree, so we can do aggressive check to reduce duplicated checks.
11356 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11358 struct btrfs_root *extent_root = root->fs_info->extent_root;
11359 struct btrfs_key key;
11360 struct btrfs_path path;
11361 struct extent_buffer *leaf;
11363 struct btrfs_extent_item *ei;
11369 struct btrfs_extent_inline_ref *iref;
11372 btrfs_init_path(&path);
11373 key.objectid = btrfs_header_bytenr(eb);
11374 key.type = BTRFS_METADATA_ITEM_KEY;
11375 key.offset = (u64)-1;
11378 * Any failure in backref resolving means we can't determine
11379 * whom the tree block belongs to.
11380 * So in that case, we need to check that tree block
11382 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11386 ret = btrfs_previous_extent_item(extent_root, &path,
11387 btrfs_header_bytenr(eb));
11391 leaf = path.nodes[0];
11392 slot = path.slots[0];
11393 btrfs_item_key_to_cpu(leaf, &key, slot);
11394 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11396 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11397 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11399 struct btrfs_tree_block_info *info;
11401 info = (struct btrfs_tree_block_info *)(ei + 1);
11402 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11405 item_size = btrfs_item_size_nr(leaf, slot);
11406 ptr = (unsigned long)iref;
11407 end = (unsigned long)ei + item_size;
11408 while (ptr < end) {
11409 iref = (struct btrfs_extent_inline_ref *)ptr;
11410 type = btrfs_extent_inline_ref_type(leaf, iref);
11411 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11414 * We only check the tree block if current root is
11415 * the lowest referencer of it.
11417 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11418 offset < root->objectid) {
11419 btrfs_release_path(&path);
11423 ptr += btrfs_extent_inline_ref_size(type);
11426 * Normally we should also check keyed tree block ref, but that may be
11427 * very time consuming. Inlined ref should already make us skip a lot
11428 * of refs now. So skip search keyed tree block ref.
11432 btrfs_release_path(&path);
11437 * Traversal function for tree block. We will do:
11438 * 1) Skip shared fs/subvolume tree blocks
11439 * 2) Update related bytes accounting
11440 * 3) Pre-order traversal
11442 static int traverse_tree_block(struct btrfs_root *root,
11443 struct extent_buffer *node)
11445 struct extent_buffer *eb;
11446 struct btrfs_key key;
11447 struct btrfs_key drop_key;
11455 * Skip shared fs/subvolume tree block, in that case they will
11456 * be checked by referencer with lowest rootid
11458 if (is_fstree(root->objectid) && !should_check(root, node))
11461 /* Update bytes accounting */
11462 total_btree_bytes += node->len;
11463 if (fs_root_objectid(btrfs_header_owner(node)))
11464 total_fs_tree_bytes += node->len;
11465 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11466 total_extent_tree_bytes += node->len;
11467 if (!found_old_backref &&
11468 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11469 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11470 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11471 found_old_backref = 1;
11473 /* pre-order tranversal, check itself first */
11474 level = btrfs_header_level(node);
11475 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11476 btrfs_header_level(node),
11477 btrfs_header_owner(node));
11481 "check %s failed root %llu bytenr %llu level %d, force continue check",
11482 level ? "node":"leaf", root->objectid,
11483 btrfs_header_bytenr(node), btrfs_header_level(node));
11486 btree_space_waste += btrfs_leaf_free_space(root, node);
11487 ret = check_leaf_items(root, node);
11492 nr = btrfs_header_nritems(node);
11493 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11494 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11495 sizeof(struct btrfs_key_ptr);
11497 /* Then check all its children */
11498 for (i = 0; i < nr; i++) {
11499 u64 blocknr = btrfs_node_blockptr(node, i);
11501 btrfs_node_key_to_cpu(node, &key, i);
11502 if (level == root->root_item.drop_level &&
11503 is_dropped_key(&key, &drop_key))
11507 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11508 * to call the function itself.
11510 eb = read_tree_block(root->fs_info, blocknr, 0);
11511 if (extent_buffer_uptodate(eb)) {
11512 ret = traverse_tree_block(root, eb);
11515 free_extent_buffer(eb);
11522 * Low memory usage version check_chunks_and_extents.
11524 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11526 struct btrfs_path path;
11527 struct btrfs_key key;
11528 struct btrfs_root *root1;
11529 struct btrfs_root *cur_root;
11533 root1 = root->fs_info->chunk_root;
11534 ret = traverse_tree_block(root1, root1->node);
11537 root1 = root->fs_info->tree_root;
11538 ret = traverse_tree_block(root1, root1->node);
11541 btrfs_init_path(&path);
11542 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11544 key.type = BTRFS_ROOT_ITEM_KEY;
11546 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11548 error("cannot find extent treet in tree_root");
11553 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11554 if (key.type != BTRFS_ROOT_ITEM_KEY)
11556 key.offset = (u64)-1;
11558 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11559 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11562 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11563 if (IS_ERR(cur_root) || !cur_root) {
11564 error("failed to read tree: %lld", key.objectid);
11568 ret = traverse_tree_block(cur_root, cur_root->node);
11571 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11572 btrfs_free_fs_root(cur_root);
11574 ret = btrfs_next_item(root1, &path);
11580 btrfs_release_path(&path);
11584 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11585 struct btrfs_root *root, int overwrite)
11587 struct extent_buffer *c;
11588 struct extent_buffer *old = root->node;
11591 struct btrfs_disk_key disk_key = {0,0,0};
11597 extent_buffer_get(c);
11600 c = btrfs_alloc_free_block(trans, root,
11601 root->fs_info->nodesize,
11602 root->root_key.objectid,
11603 &disk_key, level, 0, 0);
11606 extent_buffer_get(c);
11610 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11611 btrfs_set_header_level(c, level);
11612 btrfs_set_header_bytenr(c, c->start);
11613 btrfs_set_header_generation(c, trans->transid);
11614 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11615 btrfs_set_header_owner(c, root->root_key.objectid);
11617 write_extent_buffer(c, root->fs_info->fsid,
11618 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11620 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11621 btrfs_header_chunk_tree_uuid(c),
11624 btrfs_mark_buffer_dirty(c);
11626 * this case can happen in the following case:
11628 * 1.overwrite previous root.
11630 * 2.reinit reloc data root, this is because we skip pin
11631 * down reloc data tree before which means we can allocate
11632 * same block bytenr here.
11634 if (old->start == c->start) {
11635 btrfs_set_root_generation(&root->root_item,
11637 root->root_item.level = btrfs_header_level(root->node);
11638 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11639 &root->root_key, &root->root_item);
11641 free_extent_buffer(c);
11645 free_extent_buffer(old);
11647 add_root_to_dirty_list(root);
11651 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11652 struct extent_buffer *eb, int tree_root)
11654 struct extent_buffer *tmp;
11655 struct btrfs_root_item *ri;
11656 struct btrfs_key key;
11658 int level = btrfs_header_level(eb);
11664 * If we have pinned this block before, don't pin it again.
11665 * This can not only avoid forever loop with broken filesystem
11666 * but also give us some speedups.
11668 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11669 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11672 btrfs_pin_extent(fs_info, eb->start, eb->len);
11674 nritems = btrfs_header_nritems(eb);
11675 for (i = 0; i < nritems; i++) {
11677 btrfs_item_key_to_cpu(eb, &key, i);
11678 if (key.type != BTRFS_ROOT_ITEM_KEY)
11680 /* Skip the extent root and reloc roots */
11681 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11682 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11683 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11685 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11686 bytenr = btrfs_disk_root_bytenr(eb, ri);
11689 * If at any point we start needing the real root we
11690 * will have to build a stump root for the root we are
11691 * in, but for now this doesn't actually use the root so
11692 * just pass in extent_root.
11694 tmp = read_tree_block(fs_info, bytenr, 0);
11695 if (!extent_buffer_uptodate(tmp)) {
11696 fprintf(stderr, "Error reading root block\n");
11699 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11700 free_extent_buffer(tmp);
11704 bytenr = btrfs_node_blockptr(eb, i);
11706 /* If we aren't the tree root don't read the block */
11707 if (level == 1 && !tree_root) {
11708 btrfs_pin_extent(fs_info, bytenr,
11709 fs_info->nodesize);
11713 tmp = read_tree_block(fs_info, bytenr, 0);
11714 if (!extent_buffer_uptodate(tmp)) {
11715 fprintf(stderr, "Error reading tree block\n");
11718 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11719 free_extent_buffer(tmp);
11728 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11732 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11736 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11739 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11741 struct btrfs_block_group_cache *cache;
11742 struct btrfs_path path;
11743 struct extent_buffer *leaf;
11744 struct btrfs_chunk *chunk;
11745 struct btrfs_key key;
11749 btrfs_init_path(&path);
11751 key.type = BTRFS_CHUNK_ITEM_KEY;
11753 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11755 btrfs_release_path(&path);
11760 * We do this in case the block groups were screwed up and had alloc
11761 * bits that aren't actually set on the chunks. This happens with
11762 * restored images every time and could happen in real life I guess.
11764 fs_info->avail_data_alloc_bits = 0;
11765 fs_info->avail_metadata_alloc_bits = 0;
11766 fs_info->avail_system_alloc_bits = 0;
11768 /* First we need to create the in-memory block groups */
11770 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11771 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11773 btrfs_release_path(&path);
11781 leaf = path.nodes[0];
11782 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11783 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11788 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11789 btrfs_add_block_group(fs_info, 0,
11790 btrfs_chunk_type(leaf, chunk),
11791 key.objectid, key.offset,
11792 btrfs_chunk_length(leaf, chunk));
11793 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11794 key.offset + btrfs_chunk_length(leaf, chunk));
11799 cache = btrfs_lookup_first_block_group(fs_info, start);
11803 start = cache->key.objectid + cache->key.offset;
11806 btrfs_release_path(&path);
11810 static int reset_balance(struct btrfs_trans_handle *trans,
11811 struct btrfs_fs_info *fs_info)
11813 struct btrfs_root *root = fs_info->tree_root;
11814 struct btrfs_path path;
11815 struct extent_buffer *leaf;
11816 struct btrfs_key key;
11817 int del_slot, del_nr = 0;
11821 btrfs_init_path(&path);
11822 key.objectid = BTRFS_BALANCE_OBJECTID;
11823 key.type = BTRFS_BALANCE_ITEM_KEY;
11825 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11830 goto reinit_data_reloc;
11835 ret = btrfs_del_item(trans, root, &path);
11838 btrfs_release_path(&path);
11840 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11841 key.type = BTRFS_ROOT_ITEM_KEY;
11843 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11847 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11852 ret = btrfs_del_items(trans, root, &path,
11859 btrfs_release_path(&path);
11862 ret = btrfs_search_slot(trans, root, &key, &path,
11869 leaf = path.nodes[0];
11870 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11871 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11873 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11878 del_slot = path.slots[0];
11887 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11891 btrfs_release_path(&path);
11894 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11895 key.type = BTRFS_ROOT_ITEM_KEY;
11896 key.offset = (u64)-1;
11897 root = btrfs_read_fs_root(fs_info, &key);
11898 if (IS_ERR(root)) {
11899 fprintf(stderr, "Error reading data reloc tree\n");
11900 ret = PTR_ERR(root);
11903 record_root_in_trans(trans, root);
11904 ret = btrfs_fsck_reinit_root(trans, root, 0);
11907 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11909 btrfs_release_path(&path);
11913 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11914 struct btrfs_fs_info *fs_info)
11920 * The only reason we don't do this is because right now we're just
11921 * walking the trees we find and pinning down their bytes, we don't look
11922 * at any of the leaves. In order to do mixed groups we'd have to check
11923 * the leaves of any fs roots and pin down the bytes for any file
11924 * extents we find. Not hard but why do it if we don't have to?
11926 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11927 fprintf(stderr, "We don't support re-initing the extent tree "
11928 "for mixed block groups yet, please notify a btrfs "
11929 "developer you want to do this so they can add this "
11930 "functionality.\n");
11935 * first we need to walk all of the trees except the extent tree and pin
11936 * down the bytes that are in use so we don't overwrite any existing
11939 ret = pin_metadata_blocks(fs_info);
11941 fprintf(stderr, "error pinning down used bytes\n");
11946 * Need to drop all the block groups since we're going to recreate all
11949 btrfs_free_block_groups(fs_info);
11950 ret = reset_block_groups(fs_info);
11952 fprintf(stderr, "error resetting the block groups\n");
11956 /* Ok we can allocate now, reinit the extent root */
11957 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11959 fprintf(stderr, "extent root initialization failed\n");
11961 * When the transaction code is updated we should end the
11962 * transaction, but for now progs only knows about commit so
11963 * just return an error.
11969 * Now we have all the in-memory block groups setup so we can make
11970 * allocations properly, and the metadata we care about is safe since we
11971 * pinned all of it above.
11974 struct btrfs_block_group_cache *cache;
11976 cache = btrfs_lookup_first_block_group(fs_info, start);
11979 start = cache->key.objectid + cache->key.offset;
11980 ret = btrfs_insert_item(trans, fs_info->extent_root,
11981 &cache->key, &cache->item,
11982 sizeof(cache->item));
11984 fprintf(stderr, "Error adding block group\n");
11987 btrfs_extent_post_op(trans, fs_info->extent_root);
11990 ret = reset_balance(trans, fs_info);
11992 fprintf(stderr, "error resetting the pending balance\n");
11997 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11999 struct btrfs_path path;
12000 struct btrfs_trans_handle *trans;
12001 struct btrfs_key key;
12004 printf("Recowing metadata block %llu\n", eb->start);
12005 key.objectid = btrfs_header_owner(eb);
12006 key.type = BTRFS_ROOT_ITEM_KEY;
12007 key.offset = (u64)-1;
12009 root = btrfs_read_fs_root(root->fs_info, &key);
12010 if (IS_ERR(root)) {
12011 fprintf(stderr, "Couldn't find owner root %llu\n",
12013 return PTR_ERR(root);
12016 trans = btrfs_start_transaction(root, 1);
12018 return PTR_ERR(trans);
12020 btrfs_init_path(&path);
12021 path.lowest_level = btrfs_header_level(eb);
12022 if (path.lowest_level)
12023 btrfs_node_key_to_cpu(eb, &key, 0);
12025 btrfs_item_key_to_cpu(eb, &key, 0);
12027 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12028 btrfs_commit_transaction(trans, root);
12029 btrfs_release_path(&path);
12033 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12035 struct btrfs_path path;
12036 struct btrfs_trans_handle *trans;
12037 struct btrfs_key key;
12040 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12041 bad->key.type, bad->key.offset);
12042 key.objectid = bad->root_id;
12043 key.type = BTRFS_ROOT_ITEM_KEY;
12044 key.offset = (u64)-1;
12046 root = btrfs_read_fs_root(root->fs_info, &key);
12047 if (IS_ERR(root)) {
12048 fprintf(stderr, "Couldn't find owner root %llu\n",
12050 return PTR_ERR(root);
12053 trans = btrfs_start_transaction(root, 1);
12055 return PTR_ERR(trans);
12057 btrfs_init_path(&path);
12058 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12064 ret = btrfs_del_item(trans, root, &path);
12066 btrfs_commit_transaction(trans, root);
12067 btrfs_release_path(&path);
12071 static int zero_log_tree(struct btrfs_root *root)
12073 struct btrfs_trans_handle *trans;
12076 trans = btrfs_start_transaction(root, 1);
12077 if (IS_ERR(trans)) {
12078 ret = PTR_ERR(trans);
12081 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12082 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12083 ret = btrfs_commit_transaction(trans, root);
12087 static int populate_csum(struct btrfs_trans_handle *trans,
12088 struct btrfs_root *csum_root, char *buf, u64 start,
12091 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12096 while (offset < len) {
12097 sectorsize = fs_info->sectorsize;
12098 ret = read_extent_data(fs_info, buf, start + offset,
12102 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12103 start + offset, buf, sectorsize);
12106 offset += sectorsize;
12111 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12112 struct btrfs_root *csum_root,
12113 struct btrfs_root *cur_root)
12115 struct btrfs_path path;
12116 struct btrfs_key key;
12117 struct extent_buffer *node;
12118 struct btrfs_file_extent_item *fi;
12125 buf = malloc(cur_root->fs_info->sectorsize);
12129 btrfs_init_path(&path);
12133 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12136 /* Iterate all regular file extents and fill its csum */
12138 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12140 if (key.type != BTRFS_EXTENT_DATA_KEY)
12142 node = path.nodes[0];
12143 slot = path.slots[0];
12144 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12145 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12147 start = btrfs_file_extent_disk_bytenr(node, fi);
12148 len = btrfs_file_extent_disk_num_bytes(node, fi);
12150 ret = populate_csum(trans, csum_root, buf, start, len);
12151 if (ret == -EEXIST)
12157 * TODO: if next leaf is corrupted, jump to nearest next valid
12160 ret = btrfs_next_item(cur_root, &path);
12170 btrfs_release_path(&path);
12175 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12176 struct btrfs_root *csum_root)
12178 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12179 struct btrfs_path path;
12180 struct btrfs_root *tree_root = fs_info->tree_root;
12181 struct btrfs_root *cur_root;
12182 struct extent_buffer *node;
12183 struct btrfs_key key;
12187 btrfs_init_path(&path);
12188 key.objectid = BTRFS_FS_TREE_OBJECTID;
12190 key.type = BTRFS_ROOT_ITEM_KEY;
12191 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12200 node = path.nodes[0];
12201 slot = path.slots[0];
12202 btrfs_item_key_to_cpu(node, &key, slot);
12203 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12205 if (key.type != BTRFS_ROOT_ITEM_KEY)
12207 if (!is_fstree(key.objectid))
12209 key.offset = (u64)-1;
12211 cur_root = btrfs_read_fs_root(fs_info, &key);
12212 if (IS_ERR(cur_root) || !cur_root) {
12213 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12217 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12222 ret = btrfs_next_item(tree_root, &path);
12232 btrfs_release_path(&path);
12236 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12237 struct btrfs_root *csum_root)
12239 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12240 struct btrfs_path path;
12241 struct btrfs_extent_item *ei;
12242 struct extent_buffer *leaf;
12244 struct btrfs_key key;
12247 btrfs_init_path(&path);
12249 key.type = BTRFS_EXTENT_ITEM_KEY;
12251 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12253 btrfs_release_path(&path);
12257 buf = malloc(csum_root->fs_info->sectorsize);
12259 btrfs_release_path(&path);
12264 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12265 ret = btrfs_next_leaf(extent_root, &path);
12273 leaf = path.nodes[0];
12275 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12276 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12281 ei = btrfs_item_ptr(leaf, path.slots[0],
12282 struct btrfs_extent_item);
12283 if (!(btrfs_extent_flags(leaf, ei) &
12284 BTRFS_EXTENT_FLAG_DATA)) {
12289 ret = populate_csum(trans, csum_root, buf, key.objectid,
12296 btrfs_release_path(&path);
12302 * Recalculate the csum and put it into the csum tree.
12304 * Extent tree init will wipe out all the extent info, so in that case, we
12305 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12306 * will use fs/subvol trees to init the csum tree.
12308 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12309 struct btrfs_root *csum_root,
12310 int search_fs_tree)
12312 if (search_fs_tree)
12313 return fill_csum_tree_from_fs(trans, csum_root);
12315 return fill_csum_tree_from_extent(trans, csum_root);
12318 static void free_roots_info_cache(void)
12320 if (!roots_info_cache)
12323 while (!cache_tree_empty(roots_info_cache)) {
12324 struct cache_extent *entry;
12325 struct root_item_info *rii;
12327 entry = first_cache_extent(roots_info_cache);
12330 remove_cache_extent(roots_info_cache, entry);
12331 rii = container_of(entry, struct root_item_info, cache_extent);
12335 free(roots_info_cache);
12336 roots_info_cache = NULL;
12339 static int build_roots_info_cache(struct btrfs_fs_info *info)
12342 struct btrfs_key key;
12343 struct extent_buffer *leaf;
12344 struct btrfs_path path;
12346 if (!roots_info_cache) {
12347 roots_info_cache = malloc(sizeof(*roots_info_cache));
12348 if (!roots_info_cache)
12350 cache_tree_init(roots_info_cache);
12353 btrfs_init_path(&path);
12355 key.type = BTRFS_EXTENT_ITEM_KEY;
12357 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12360 leaf = path.nodes[0];
12363 struct btrfs_key found_key;
12364 struct btrfs_extent_item *ei;
12365 struct btrfs_extent_inline_ref *iref;
12366 int slot = path.slots[0];
12371 struct cache_extent *entry;
12372 struct root_item_info *rii;
12374 if (slot >= btrfs_header_nritems(leaf)) {
12375 ret = btrfs_next_leaf(info->extent_root, &path);
12382 leaf = path.nodes[0];
12383 slot = path.slots[0];
12386 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12388 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12389 found_key.type != BTRFS_METADATA_ITEM_KEY)
12392 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12393 flags = btrfs_extent_flags(leaf, ei);
12395 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12396 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12399 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12400 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12401 level = found_key.offset;
12403 struct btrfs_tree_block_info *binfo;
12405 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12406 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12407 level = btrfs_tree_block_level(leaf, binfo);
12411 * For a root extent, it must be of the following type and the
12412 * first (and only one) iref in the item.
12414 type = btrfs_extent_inline_ref_type(leaf, iref);
12415 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12418 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12419 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12421 rii = malloc(sizeof(struct root_item_info));
12426 rii->cache_extent.start = root_id;
12427 rii->cache_extent.size = 1;
12428 rii->level = (u8)-1;
12429 entry = &rii->cache_extent;
12430 ret = insert_cache_extent(roots_info_cache, entry);
12433 rii = container_of(entry, struct root_item_info,
12437 ASSERT(rii->cache_extent.start == root_id);
12438 ASSERT(rii->cache_extent.size == 1);
12440 if (level > rii->level || rii->level == (u8)-1) {
12441 rii->level = level;
12442 rii->bytenr = found_key.objectid;
12443 rii->gen = btrfs_extent_generation(leaf, ei);
12444 rii->node_count = 1;
12445 } else if (level == rii->level) {
12453 btrfs_release_path(&path);
12458 static int maybe_repair_root_item(struct btrfs_path *path,
12459 const struct btrfs_key *root_key,
12460 const int read_only_mode)
12462 const u64 root_id = root_key->objectid;
12463 struct cache_extent *entry;
12464 struct root_item_info *rii;
12465 struct btrfs_root_item ri;
12466 unsigned long offset;
12468 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12471 "Error: could not find extent items for root %llu\n",
12472 root_key->objectid);
12476 rii = container_of(entry, struct root_item_info, cache_extent);
12477 ASSERT(rii->cache_extent.start == root_id);
12478 ASSERT(rii->cache_extent.size == 1);
12480 if (rii->node_count != 1) {
12482 "Error: could not find btree root extent for root %llu\n",
12487 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12488 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12490 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12491 btrfs_root_level(&ri) != rii->level ||
12492 btrfs_root_generation(&ri) != rii->gen) {
12495 * If we're in repair mode but our caller told us to not update
12496 * the root item, i.e. just check if it needs to be updated, don't
12497 * print this message, since the caller will call us again shortly
12498 * for the same root item without read only mode (the caller will
12499 * open a transaction first).
12501 if (!(read_only_mode && repair))
12503 "%sroot item for root %llu,"
12504 " current bytenr %llu, current gen %llu, current level %u,"
12505 " new bytenr %llu, new gen %llu, new level %u\n",
12506 (read_only_mode ? "" : "fixing "),
12508 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12509 btrfs_root_level(&ri),
12510 rii->bytenr, rii->gen, rii->level);
12512 if (btrfs_root_generation(&ri) > rii->gen) {
12514 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12515 root_id, btrfs_root_generation(&ri), rii->gen);
12519 if (!read_only_mode) {
12520 btrfs_set_root_bytenr(&ri, rii->bytenr);
12521 btrfs_set_root_level(&ri, rii->level);
12522 btrfs_set_root_generation(&ri, rii->gen);
12523 write_extent_buffer(path->nodes[0], &ri,
12524 offset, sizeof(ri));
12534 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12535 * caused read-only snapshots to be corrupted if they were created at a moment
12536 * when the source subvolume/snapshot had orphan items. The issue was that the
12537 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12538 * node instead of the post orphan cleanup root node.
12539 * So this function, and its callees, just detects and fixes those cases. Even
12540 * though the regression was for read-only snapshots, this function applies to
12541 * any snapshot/subvolume root.
12542 * This must be run before any other repair code - not doing it so, makes other
12543 * repair code delete or modify backrefs in the extent tree for example, which
12544 * will result in an inconsistent fs after repairing the root items.
12546 static int repair_root_items(struct btrfs_fs_info *info)
12548 struct btrfs_path path;
12549 struct btrfs_key key;
12550 struct extent_buffer *leaf;
12551 struct btrfs_trans_handle *trans = NULL;
12554 int need_trans = 0;
12556 btrfs_init_path(&path);
12558 ret = build_roots_info_cache(info);
12562 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12563 key.type = BTRFS_ROOT_ITEM_KEY;
12568 * Avoid opening and committing transactions if a leaf doesn't have
12569 * any root items that need to be fixed, so that we avoid rotating
12570 * backup roots unnecessarily.
12573 trans = btrfs_start_transaction(info->tree_root, 1);
12574 if (IS_ERR(trans)) {
12575 ret = PTR_ERR(trans);
12580 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12584 leaf = path.nodes[0];
12587 struct btrfs_key found_key;
12589 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12590 int no_more_keys = find_next_key(&path, &key);
12592 btrfs_release_path(&path);
12594 ret = btrfs_commit_transaction(trans,
12606 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12608 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12610 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12613 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12617 if (!trans && repair) {
12620 btrfs_release_path(&path);
12630 free_roots_info_cache();
12631 btrfs_release_path(&path);
12633 btrfs_commit_transaction(trans, info->tree_root);
12640 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12642 struct btrfs_trans_handle *trans;
12643 struct btrfs_block_group_cache *bg_cache;
12647 /* Clear all free space cache inodes and its extent data */
12649 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12652 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12655 current = bg_cache->key.objectid + bg_cache->key.offset;
12658 /* Don't forget to set cache_generation to -1 */
12659 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12660 if (IS_ERR(trans)) {
12661 error("failed to update super block cache generation");
12662 return PTR_ERR(trans);
12664 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12665 btrfs_commit_transaction(trans, fs_info->tree_root);
12670 const char * const cmd_check_usage[] = {
12671 "btrfs check [options] <device>",
12672 "Check structural integrity of a filesystem (unmounted).",
12673 "Check structural integrity of an unmounted filesystem. Verify internal",
12674 "trees' consistency and item connectivity. In the repair mode try to",
12675 "fix the problems found. ",
12676 "WARNING: the repair mode is considered dangerous",
12678 "-s|--super <superblock> use this superblock copy",
12679 "-b|--backup use the first valid backup root copy",
12680 "--repair try to repair the filesystem",
12681 "--readonly run in read-only mode (default)",
12682 "--init-csum-tree create a new CRC tree",
12683 "--init-extent-tree create a new extent tree",
12684 "--mode <MODE> allows choice of memory/IO trade-offs",
12685 " where MODE is one of:",
12686 " original - read inodes and extents to memory (requires",
12687 " more memory, does less IO)",
12688 " lowmem - try to use less memory but read blocks again",
12690 "--check-data-csum verify checksums of data blocks",
12691 "-Q|--qgroup-report print a report on qgroup consistency",
12692 "-E|--subvol-extents <subvolid>",
12693 " print subvolume extents and sharing state",
12694 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12695 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12696 "-p|--progress indicate progress",
12697 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12701 int cmd_check(int argc, char **argv)
12703 struct cache_tree root_cache;
12704 struct btrfs_root *root;
12705 struct btrfs_fs_info *info;
12708 u64 tree_root_bytenr = 0;
12709 u64 chunk_root_bytenr = 0;
12710 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12714 int init_csum_tree = 0;
12716 int clear_space_cache = 0;
12717 int qgroup_report = 0;
12718 int qgroups_repaired = 0;
12719 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12723 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12724 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12725 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12726 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12727 static const struct option long_options[] = {
12728 { "super", required_argument, NULL, 's' },
12729 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12730 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12731 { "init-csum-tree", no_argument, NULL,
12732 GETOPT_VAL_INIT_CSUM },
12733 { "init-extent-tree", no_argument, NULL,
12734 GETOPT_VAL_INIT_EXTENT },
12735 { "check-data-csum", no_argument, NULL,
12736 GETOPT_VAL_CHECK_CSUM },
12737 { "backup", no_argument, NULL, 'b' },
12738 { "subvol-extents", required_argument, NULL, 'E' },
12739 { "qgroup-report", no_argument, NULL, 'Q' },
12740 { "tree-root", required_argument, NULL, 'r' },
12741 { "chunk-root", required_argument, NULL,
12742 GETOPT_VAL_CHUNK_TREE },
12743 { "progress", no_argument, NULL, 'p' },
12744 { "mode", required_argument, NULL,
12746 { "clear-space-cache", required_argument, NULL,
12747 GETOPT_VAL_CLEAR_SPACE_CACHE},
12748 { NULL, 0, NULL, 0}
12751 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
12755 case 'a': /* ignored */ break;
12757 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12760 num = arg_strtou64(optarg);
12761 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12763 "super mirror should be less than %d",
12764 BTRFS_SUPER_MIRROR_MAX);
12767 bytenr = btrfs_sb_offset(((int)num));
12768 printf("using SB copy %llu, bytenr %llu\n", num,
12769 (unsigned long long)bytenr);
12775 subvolid = arg_strtou64(optarg);
12778 tree_root_bytenr = arg_strtou64(optarg);
12780 case GETOPT_VAL_CHUNK_TREE:
12781 chunk_root_bytenr = arg_strtou64(optarg);
12784 ctx.progress_enabled = true;
12788 usage(cmd_check_usage);
12789 case GETOPT_VAL_REPAIR:
12790 printf("enabling repair mode\n");
12792 ctree_flags |= OPEN_CTREE_WRITES;
12794 case GETOPT_VAL_READONLY:
12797 case GETOPT_VAL_INIT_CSUM:
12798 printf("Creating a new CRC tree\n");
12799 init_csum_tree = 1;
12801 ctree_flags |= OPEN_CTREE_WRITES;
12803 case GETOPT_VAL_INIT_EXTENT:
12804 init_extent_tree = 1;
12805 ctree_flags |= (OPEN_CTREE_WRITES |
12806 OPEN_CTREE_NO_BLOCK_GROUPS);
12809 case GETOPT_VAL_CHECK_CSUM:
12810 check_data_csum = 1;
12812 case GETOPT_VAL_MODE:
12813 check_mode = parse_check_mode(optarg);
12814 if (check_mode == CHECK_MODE_UNKNOWN) {
12815 error("unknown mode: %s", optarg);
12819 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12820 if (strcmp(optarg, "v1") == 0) {
12821 clear_space_cache = 1;
12822 } else if (strcmp(optarg, "v2") == 0) {
12823 clear_space_cache = 2;
12824 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12827 "invalid argument to --clear-space-cache, must be v1 or v2");
12830 ctree_flags |= OPEN_CTREE_WRITES;
12835 if (check_argc_exact(argc - optind, 1))
12836 usage(cmd_check_usage);
12838 if (ctx.progress_enabled) {
12839 ctx.tp = TASK_NOTHING;
12840 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12843 /* This check is the only reason for --readonly to exist */
12844 if (readonly && repair) {
12845 error("repair options are not compatible with --readonly");
12850 * Not supported yet
12852 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12853 error("low memory mode doesn't support repair yet");
12858 cache_tree_init(&root_cache);
12860 if((ret = check_mounted(argv[optind])) < 0) {
12861 error("could not check mount status: %s", strerror(-ret));
12865 error("%s is currently mounted, aborting", argv[optind]);
12871 /* only allow partial opening under repair mode */
12873 ctree_flags |= OPEN_CTREE_PARTIAL;
12875 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12876 chunk_root_bytenr, ctree_flags);
12878 error("cannot open file system");
12884 global_info = info;
12885 root = info->fs_root;
12886 if (clear_space_cache == 1) {
12887 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12889 "free space cache v2 detected, use --clear-space-cache v2");
12893 printf("Clearing free space cache\n");
12894 ret = clear_free_space_cache(info);
12896 error("failed to clear free space cache");
12899 printf("Free space cache cleared\n");
12902 } else if (clear_space_cache == 2) {
12903 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12904 printf("no free space cache v2 to clear\n");
12908 printf("Clear free space cache v2\n");
12909 ret = btrfs_clear_free_space_tree(info);
12911 error("failed to clear free space cache v2: %d", ret);
12914 printf("free space cache v2 cleared\n");
12920 * repair mode will force us to commit transaction which
12921 * will make us fail to load log tree when mounting.
12923 if (repair && btrfs_super_log_root(info->super_copy)) {
12924 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12930 ret = zero_log_tree(root);
12933 error("failed to zero log tree: %d", ret);
12938 uuid_unparse(info->super_copy->fsid, uuidbuf);
12939 if (qgroup_report) {
12940 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12942 ret = qgroup_verify_all(info);
12949 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12950 subvolid, argv[optind], uuidbuf);
12951 ret = print_extent_state(info, subvolid);
12955 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12957 if (!extent_buffer_uptodate(info->tree_root->node) ||
12958 !extent_buffer_uptodate(info->dev_root->node) ||
12959 !extent_buffer_uptodate(info->chunk_root->node)) {
12960 error("critical roots corrupted, unable to check the filesystem");
12966 if (init_extent_tree || init_csum_tree) {
12967 struct btrfs_trans_handle *trans;
12969 trans = btrfs_start_transaction(info->extent_root, 0);
12970 if (IS_ERR(trans)) {
12971 error("error starting transaction");
12972 ret = PTR_ERR(trans);
12977 if (init_extent_tree) {
12978 printf("Creating a new extent tree\n");
12979 ret = reinit_extent_tree(trans, info);
12985 if (init_csum_tree) {
12986 printf("Reinitialize checksum tree\n");
12987 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12989 error("checksum tree initialization failed: %d",
12996 ret = fill_csum_tree(trans, info->csum_root,
13000 error("checksum tree refilling failed: %d", ret);
13005 * Ok now we commit and run the normal fsck, which will add
13006 * extent entries for all of the items it finds.
13008 ret = btrfs_commit_transaction(trans, info->extent_root);
13013 if (!extent_buffer_uptodate(info->extent_root->node)) {
13014 error("critical: extent_root, unable to check the filesystem");
13019 if (!extent_buffer_uptodate(info->csum_root->node)) {
13020 error("critical: csum_root, unable to check the filesystem");
13026 if (!ctx.progress_enabled)
13027 fprintf(stderr, "checking extents\n");
13028 if (check_mode == CHECK_MODE_LOWMEM)
13029 ret = check_chunks_and_extents_v2(root);
13031 ret = check_chunks_and_extents(root);
13035 "errors found in extent allocation tree or chunk allocation");
13037 ret = repair_root_items(info);
13040 error("failed to repair root items: %s", strerror(-ret));
13044 fprintf(stderr, "Fixed %d roots.\n", ret);
13046 } else if (ret > 0) {
13048 "Found %d roots with an outdated root item.\n",
13051 "Please run a filesystem check with the option --repair to fix them.\n");
13057 if (!ctx.progress_enabled) {
13058 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13059 fprintf(stderr, "checking free space tree\n");
13061 fprintf(stderr, "checking free space cache\n");
13063 ret = check_space_cache(root);
13066 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13067 error("errors found in free space tree");
13069 error("errors found in free space cache");
13074 * We used to have to have these hole extents in between our real
13075 * extents so if we don't have this flag set we need to make sure there
13076 * are no gaps in the file extents for inodes, otherwise we can just
13077 * ignore it when this happens.
13079 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13080 if (!ctx.progress_enabled)
13081 fprintf(stderr, "checking fs roots\n");
13082 if (check_mode == CHECK_MODE_LOWMEM)
13083 ret = check_fs_roots_v2(root->fs_info);
13085 ret = check_fs_roots(root, &root_cache);
13088 error("errors found in fs roots");
13092 fprintf(stderr, "checking csums\n");
13093 ret = check_csums(root);
13096 error("errors found in csum tree");
13100 fprintf(stderr, "checking root refs\n");
13101 /* For low memory mode, check_fs_roots_v2 handles root refs */
13102 if (check_mode != CHECK_MODE_LOWMEM) {
13103 ret = check_root_refs(root, &root_cache);
13106 error("errors found in root refs");
13111 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13112 struct extent_buffer *eb;
13114 eb = list_first_entry(&root->fs_info->recow_ebs,
13115 struct extent_buffer, recow);
13116 list_del_init(&eb->recow);
13117 ret = recow_extent_buffer(root, eb);
13120 error("fails to fix transid errors");
13125 while (!list_empty(&delete_items)) {
13126 struct bad_item *bad;
13128 bad = list_first_entry(&delete_items, struct bad_item, list);
13129 list_del_init(&bad->list);
13131 ret = delete_bad_item(root, bad);
13137 if (info->quota_enabled) {
13138 fprintf(stderr, "checking quota groups\n");
13139 ret = qgroup_verify_all(info);
13142 error("failed to check quota groups");
13146 ret = repair_qgroups(info, &qgroups_repaired);
13149 error("failed to repair quota groups");
13155 if (!list_empty(&root->fs_info->recow_ebs)) {
13156 error("transid errors in file system");
13161 if (found_old_backref) { /*
13162 * there was a disk format change when mixed
13163 * backref was in testing tree. The old format
13164 * existed about one week.
13166 printf("\n * Found old mixed backref format. "
13167 "The old format is not supported! *"
13168 "\n * Please mount the FS in readonly mode, "
13169 "backup data and re-format the FS. *\n\n");
13172 printf("found %llu bytes used, ",
13173 (unsigned long long)bytes_used);
13175 printf("error(s) found\n");
13177 printf("no error found\n");
13178 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13179 printf("total tree bytes: %llu\n",
13180 (unsigned long long)total_btree_bytes);
13181 printf("total fs tree bytes: %llu\n",
13182 (unsigned long long)total_fs_tree_bytes);
13183 printf("total extent tree bytes: %llu\n",
13184 (unsigned long long)total_extent_tree_bytes);
13185 printf("btree space waste bytes: %llu\n",
13186 (unsigned long long)btree_space_waste);
13187 printf("file data blocks allocated: %llu\n referenced %llu\n",
13188 (unsigned long long)data_bytes_allocated,
13189 (unsigned long long)data_bytes_referenced);
13191 free_qgroup_counts();
13192 free_root_recs_tree(&root_cache);
13196 if (ctx.progress_enabled)
13197 task_deinit(ctx.info);