2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
878 node = container_of(cache, struct ptr_node, cache);
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
888 rec = calloc(1, sizeof(*rec));
890 return ERR_PTR(-ENOMEM);
892 rec->extent_start = (u64)-1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
910 ret = insert_cache_extent(inode_cache, &node->cache);
912 return ERR_PTR(-EEXIST);
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
962 if (!rec->found_inode_item)
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
978 if (!rec->checked || rec->merging)
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1069 if (backref->dir != dir || backref->namelen != namelen)
1071 if (memcmp(name, backref->name, namelen))
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1079 memset(backref, 0, sizeof(*backref));
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1137 maybe_free_inode_rec(inode_cache, rec);
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1234 if (--src_node->refs == 0)
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1242 cache = search_cache_extent(src, 0);
1244 node = container_of(cache, struct ptr_node, cache);
1246 cache = next_cache_extent(cache);
1249 remove_cache_extent(src, &node->cache);
1252 ins = malloc(sizeof(*ins));
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1302 free_inode_rec(rec);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1316 node = container_of(cache, struct shared_node, cache);
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1336 ret = insert_cache_extent(shared, &node->cache);
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1348 if (level == wc->active_node)
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1389 if (level == wc->root_level)
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1408 BUG_ON(node->refs < 2);
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1440 btrfs_release_path(&path);
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1476 btrfs_release_path(&path);
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1522 len = min_t(u32, total - cur - sizeof(*di),
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532 add_inode_backref(inode_cache, location.objectid,
1533 key->objectid, key->offset, namebuf,
1534 len, filetype, key->type, error);
1535 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536 add_inode_backref(root_cache, location.objectid,
1537 key->objectid, key->offset,
1538 namebuf, len, filetype,
1541 fprintf(stderr, "invalid location in dir item %u\n",
1543 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544 key->objectid, key->offset, namebuf,
1545 len, filetype, key->type, error);
1548 len = sizeof(*di) + name_len + data_len;
1549 di = (struct btrfs_dir_item *)((char *)di + len);
1552 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553 rec->errors |= I_ERR_DUP_DIR_INDEX;
1558 static int process_inode_ref(struct extent_buffer *eb,
1559 int slot, struct btrfs_key *key,
1560 struct shared_node *active_node)
1568 struct cache_tree *inode_cache;
1569 struct btrfs_inode_ref *ref;
1570 char namebuf[BTRFS_NAME_LEN];
1572 inode_cache = &active_node->inode_cache;
1574 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575 total = btrfs_item_size_nr(eb, slot);
1576 while (cur < total) {
1577 name_len = btrfs_inode_ref_name_len(eb, ref);
1578 index = btrfs_inode_ref_index(eb, ref);
1580 /* inode_ref + namelen should not cross item boundary */
1581 if (cur + sizeof(*ref) + name_len > total ||
1582 name_len > BTRFS_NAME_LEN) {
1583 if (total < cur + sizeof(*ref))
1586 /* Still try to read out the remaining part */
1587 len = min_t(u32, total - cur - sizeof(*ref),
1589 error = REF_ERR_NAME_TOO_LONG;
1595 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596 add_inode_backref(inode_cache, key->objectid, key->offset,
1597 index, namebuf, len, 0, key->type, error);
1599 len = sizeof(*ref) + name_len;
1600 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1606 static int process_inode_extref(struct extent_buffer *eb,
1607 int slot, struct btrfs_key *key,
1608 struct shared_node *active_node)
1617 struct cache_tree *inode_cache;
1618 struct btrfs_inode_extref *extref;
1619 char namebuf[BTRFS_NAME_LEN];
1621 inode_cache = &active_node->inode_cache;
1623 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624 total = btrfs_item_size_nr(eb, slot);
1625 while (cur < total) {
1626 name_len = btrfs_inode_extref_name_len(eb, extref);
1627 index = btrfs_inode_extref_index(eb, extref);
1628 parent = btrfs_inode_extref_parent(eb, extref);
1629 if (name_len <= BTRFS_NAME_LEN) {
1633 len = BTRFS_NAME_LEN;
1634 error = REF_ERR_NAME_TOO_LONG;
1636 read_extent_buffer(eb, namebuf,
1637 (unsigned long)(extref + 1), len);
1638 add_inode_backref(inode_cache, key->objectid, parent,
1639 index, namebuf, len, 0, key->type, error);
1641 len = sizeof(*extref) + name_len;
1642 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650 u64 len, u64 *found)
1652 struct btrfs_key key;
1653 struct btrfs_path path;
1654 struct extent_buffer *leaf;
1659 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1661 btrfs_init_path(&path);
1663 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1665 key.type = BTRFS_EXTENT_CSUM_KEY;
1667 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1671 if (ret > 0 && path.slots[0] > 0) {
1672 leaf = path.nodes[0];
1673 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675 key.type == BTRFS_EXTENT_CSUM_KEY)
1680 leaf = path.nodes[0];
1681 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1687 leaf = path.nodes[0];
1690 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692 key.type != BTRFS_EXTENT_CSUM_KEY)
1695 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696 if (key.offset >= start + len)
1699 if (key.offset > start)
1702 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703 csum_end = key.offset + (size / csum_size) *
1704 root->fs_info->sectorsize;
1705 if (csum_end > start) {
1706 size = min(csum_end - start, len);
1715 btrfs_release_path(&path);
1721 static int process_file_extent(struct btrfs_root *root,
1722 struct extent_buffer *eb,
1723 int slot, struct btrfs_key *key,
1724 struct shared_node *active_node)
1726 struct inode_record *rec;
1727 struct btrfs_file_extent_item *fi;
1729 u64 disk_bytenr = 0;
1730 u64 extent_offset = 0;
1731 u64 mask = root->fs_info->sectorsize - 1;
1735 rec = active_node->current;
1736 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737 rec->found_file_extent = 1;
1739 if (rec->extent_start == (u64)-1) {
1740 rec->extent_start = key->offset;
1741 rec->extent_end = key->offset;
1744 if (rec->extent_end > key->offset)
1745 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746 else if (rec->extent_end < key->offset) {
1747 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748 key->offset - rec->extent_end);
1753 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754 extent_type = btrfs_file_extent_type(eb, fi);
1756 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 rec->found_size += num_bytes;
1761 num_bytes = (num_bytes + mask) & ~mask;
1762 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766 extent_offset = btrfs_file_extent_offset(eb, fi);
1767 if (num_bytes == 0 || (num_bytes & mask))
1768 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769 if (num_bytes + extent_offset >
1770 btrfs_file_extent_ram_bytes(eb, fi))
1771 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773 (btrfs_file_extent_compression(eb, fi) ||
1774 btrfs_file_extent_encryption(eb, fi) ||
1775 btrfs_file_extent_other_encoding(eb, fi)))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (disk_bytenr > 0)
1778 rec->found_size += num_bytes;
1780 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782 rec->extent_end = key->offset + num_bytes;
1785 * The data reloc tree will copy full extents into its inode and then
1786 * copy the corresponding csums. Because the extent it copied could be
1787 * a preallocated extent that hasn't been written to yet there may be no
1788 * csums to copy, ergo we won't have csums for our file extent. This is
1789 * ok so just don't bother checking csums if the inode belongs to the
1792 if (disk_bytenr > 0 &&
1793 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1795 if (btrfs_file_extent_compression(eb, fi))
1796 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1798 disk_bytenr += extent_offset;
1800 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1803 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1805 rec->found_csum_item = 1;
1806 if (found < num_bytes)
1807 rec->some_csum_missing = 1;
1808 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1810 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817 struct walk_control *wc)
1819 struct btrfs_key key;
1823 struct cache_tree *inode_cache;
1824 struct shared_node *active_node;
1826 if (wc->root_level == wc->active_node &&
1827 btrfs_root_refs(&root->root_item) == 0)
1830 active_node = wc->nodes[wc->active_node];
1831 inode_cache = &active_node->inode_cache;
1832 nritems = btrfs_header_nritems(eb);
1833 for (i = 0; i < nritems; i++) {
1834 btrfs_item_key_to_cpu(eb, &key, i);
1836 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1838 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1841 if (active_node->current == NULL ||
1842 active_node->current->ino < key.objectid) {
1843 if (active_node->current) {
1844 active_node->current->checked = 1;
1845 maybe_free_inode_rec(inode_cache,
1846 active_node->current);
1848 active_node->current = get_inode_rec(inode_cache,
1850 BUG_ON(IS_ERR(active_node->current));
1853 case BTRFS_DIR_ITEM_KEY:
1854 case BTRFS_DIR_INDEX_KEY:
1855 ret = process_dir_item(eb, i, &key, active_node);
1857 case BTRFS_INODE_REF_KEY:
1858 ret = process_inode_ref(eb, i, &key, active_node);
1860 case BTRFS_INODE_EXTREF_KEY:
1861 ret = process_inode_extref(eb, i, &key, active_node);
1863 case BTRFS_INODE_ITEM_KEY:
1864 ret = process_inode_item(eb, i, &key, active_node);
1866 case BTRFS_EXTENT_DATA_KEY:
1867 ret = process_file_extent(root, eb, i, &key,
1878 u64 bytenr[BTRFS_MAX_LEVEL];
1879 u64 refs[BTRFS_MAX_LEVEL];
1880 int need_check[BTRFS_MAX_LEVEL];
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884 struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886 unsigned int ext_ref);
1889 * Returns >0 Found error, not fatal, should continue
1890 * Returns <0 Fatal error, must exit the whole check
1891 * Returns 0 No errors found
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894 struct node_refs *nrefs, int *level, int ext_ref)
1896 struct extent_buffer *cur = path->nodes[0];
1897 struct btrfs_key key;
1901 int root_level = btrfs_header_level(root->node);
1903 int ret = 0; /* Final return value */
1904 int err = 0; /* Positive error bitmap */
1906 cur_bytenr = cur->start;
1908 /* skip to first inode item or the first inode number change */
1909 nritems = btrfs_header_nritems(cur);
1910 for (i = 0; i < nritems; i++) {
1911 btrfs_item_key_to_cpu(cur, &key, i);
1913 first_ino = key.objectid;
1914 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915 (first_ino && first_ino != key.objectid))
1919 path->slots[0] = nritems;
1925 err |= check_inode_item(root, path, ext_ref);
1927 if (err & LAST_ITEM)
1930 /* still have inode items in thie leaf */
1931 if (cur->start == cur_bytenr)
1935 * we have switched to another leaf, above nodes may
1936 * have changed, here walk down the path, if a node
1937 * or leaf is shared, check whether we can skip this
1940 for (i = root_level; i >= 0; i--) {
1941 if (path->nodes[i]->start == nrefs->bytenr[i])
1944 ret = update_nodes_refs(root,
1945 path->nodes[i]->start,
1950 if (!nrefs->need_check[i]) {
1956 for (i = 0; i < *level; i++) {
1957 free_extent_buffer(path->nodes[i]);
1958 path->nodes[i] = NULL;
1967 static void reada_walk_down(struct btrfs_root *root,
1968 struct extent_buffer *node, int slot)
1977 level = btrfs_header_level(node);
1981 nritems = btrfs_header_nritems(node);
1982 blocksize = root->fs_info->nodesize;
1983 for (i = slot; i < nritems; i++) {
1984 bytenr = btrfs_node_blockptr(node, i);
1985 ptr_gen = btrfs_node_ptr_generation(node, i);
1986 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1991 * Check the child node/leaf by the following condition:
1992 * 1. the first item key of the node/leaf should be the same with the one
1994 * 2. block in parent node should match the child node/leaf.
1995 * 3. generation of parent node and child's header should be consistent.
1997 * Or the child node/leaf pointed by the key in parent is not valid.
1999 * We hope to check leaf owner too, but since subvol may share leaves,
2000 * which makes leaf owner check not so strong, key check should be
2001 * sufficient enough for that case.
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004 struct extent_buffer *child)
2006 struct btrfs_key parent_key;
2007 struct btrfs_key child_key;
2010 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011 if (btrfs_header_level(child) == 0)
2012 btrfs_item_key_to_cpu(child, &child_key, 0);
2014 btrfs_node_key_to_cpu(child, &child_key, 0);
2016 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2019 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020 parent_key.objectid, parent_key.type, parent_key.offset,
2021 child_key.objectid, child_key.type, child_key.offset);
2023 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2025 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026 btrfs_node_blockptr(parent, slot),
2027 btrfs_header_bytenr(child));
2029 if (btrfs_node_ptr_generation(parent, slot) !=
2030 btrfs_header_generation(child)) {
2032 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033 btrfs_header_generation(child),
2034 btrfs_node_ptr_generation(parent, slot));
2040 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041 * in every fs or file tree check. Here we find its all root ids, and only check
2042 * it in the fs or file tree which has the smallest root id.
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2046 struct rb_node *node;
2047 struct ulist_node *u;
2049 if (roots->nnodes == 1)
2052 node = rb_first(&roots->root);
2053 u = rb_entry(node, struct ulist_node, rb_node);
2055 * current root id is not smallest, we skip it and let it be checked
2056 * in the fs or file tree who hash the smallest root id.
2058 if (root->objectid != u->val)
2065 * for a tree node or leaf, we record its reference count, so later if we still
2066 * process this node or leaf, don't need to compute its reference count again.
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069 struct node_refs *nrefs, u64 level)
2073 struct ulist *roots;
2075 if (nrefs->bytenr[level] != bytenr) {
2076 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077 level, 1, &refs, NULL);
2081 nrefs->bytenr[level] = bytenr;
2082 nrefs->refs[level] = refs;
2084 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2089 check = need_check(root, roots);
2091 nrefs->need_check[level] = check;
2093 nrefs->need_check[level] = 1;
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101 struct walk_control *wc, int *level,
2102 struct node_refs *nrefs)
2104 enum btrfs_tree_block_status status;
2107 struct extent_buffer *next;
2108 struct extent_buffer *cur;
2113 WARN_ON(*level < 0);
2114 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2116 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2117 refs = nrefs->refs[*level];
2120 ret = btrfs_lookup_extent_info(NULL, root,
2121 path->nodes[*level]->start,
2122 *level, 1, &refs, NULL);
2127 nrefs->bytenr[*level] = path->nodes[*level]->start;
2128 nrefs->refs[*level] = refs;
2132 ret = enter_shared_node(root, path->nodes[*level]->start,
2140 while (*level >= 0) {
2141 WARN_ON(*level < 0);
2142 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2143 cur = path->nodes[*level];
2145 if (btrfs_header_level(cur) != *level)
2148 if (path->slots[*level] >= btrfs_header_nritems(cur))
2151 ret = process_one_leaf(root, cur, wc);
2156 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2157 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2158 blocksize = root->fs_info->nodesize;
2160 if (bytenr == nrefs->bytenr[*level - 1]) {
2161 refs = nrefs->refs[*level - 1];
2163 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2164 *level - 1, 1, &refs, NULL);
2168 nrefs->bytenr[*level - 1] = bytenr;
2169 nrefs->refs[*level - 1] = refs;
2174 ret = enter_shared_node(root, bytenr, refs,
2177 path->slots[*level]++;
2182 next = btrfs_find_tree_block(root, bytenr, blocksize);
2183 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2184 free_extent_buffer(next);
2185 reada_walk_down(root, cur, path->slots[*level]);
2186 next = read_tree_block(root->fs_info, bytenr, blocksize,
2188 if (!extent_buffer_uptodate(next)) {
2189 struct btrfs_key node_key;
2191 btrfs_node_key_to_cpu(path->nodes[*level],
2193 path->slots[*level]);
2194 btrfs_add_corrupt_extent_record(root->fs_info,
2196 path->nodes[*level]->start,
2197 root->fs_info->nodesize,
2204 ret = check_child_node(cur, path->slots[*level], next);
2206 free_extent_buffer(next);
2211 if (btrfs_is_leaf(next))
2212 status = btrfs_check_leaf(root, NULL, next);
2214 status = btrfs_check_node(root, NULL, next);
2215 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2216 free_extent_buffer(next);
2221 *level = *level - 1;
2222 free_extent_buffer(path->nodes[*level]);
2223 path->nodes[*level] = next;
2224 path->slots[*level] = 0;
2227 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2231 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2232 unsigned int ext_ref);
2235 * Returns >0 Found error, should continue
2236 * Returns <0 Fatal error, must exit the whole check
2237 * Returns 0 No errors found
2239 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2240 int *level, struct node_refs *nrefs, int ext_ref)
2242 enum btrfs_tree_block_status status;
2245 struct extent_buffer *next;
2246 struct extent_buffer *cur;
2250 WARN_ON(*level < 0);
2251 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2253 ret = update_nodes_refs(root, path->nodes[*level]->start,
2258 while (*level >= 0) {
2259 WARN_ON(*level < 0);
2260 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2261 cur = path->nodes[*level];
2263 if (btrfs_header_level(cur) != *level)
2266 if (path->slots[*level] >= btrfs_header_nritems(cur))
2268 /* Don't forgot to check leaf/node validation */
2270 ret = btrfs_check_leaf(root, NULL, cur);
2271 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2275 ret = process_one_leaf_v2(root, path, nrefs,
2279 ret = btrfs_check_node(root, NULL, cur);
2280 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2285 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2286 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2287 blocksize = root->fs_info->nodesize;
2289 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2292 if (!nrefs->need_check[*level - 1]) {
2293 path->slots[*level]++;
2297 next = btrfs_find_tree_block(root, bytenr, blocksize);
2298 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2299 free_extent_buffer(next);
2300 reada_walk_down(root, cur, path->slots[*level]);
2301 next = read_tree_block(root->fs_info, bytenr, blocksize,
2303 if (!extent_buffer_uptodate(next)) {
2304 struct btrfs_key node_key;
2306 btrfs_node_key_to_cpu(path->nodes[*level],
2308 path->slots[*level]);
2309 btrfs_add_corrupt_extent_record(root->fs_info,
2311 path->nodes[*level]->start,
2312 root->fs_info->nodesize,
2319 ret = check_child_node(cur, path->slots[*level], next);
2323 if (btrfs_is_leaf(next))
2324 status = btrfs_check_leaf(root, NULL, next);
2326 status = btrfs_check_node(root, NULL, next);
2327 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2328 free_extent_buffer(next);
2333 *level = *level - 1;
2334 free_extent_buffer(path->nodes[*level]);
2335 path->nodes[*level] = next;
2336 path->slots[*level] = 0;
2341 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2342 struct walk_control *wc, int *level)
2345 struct extent_buffer *leaf;
2347 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2348 leaf = path->nodes[i];
2349 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2354 free_extent_buffer(path->nodes[*level]);
2355 path->nodes[*level] = NULL;
2356 BUG_ON(*level > wc->active_node);
2357 if (*level == wc->active_node)
2358 leave_shared_node(root, wc, *level);
2365 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2369 struct extent_buffer *leaf;
2371 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2372 leaf = path->nodes[i];
2373 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2378 free_extent_buffer(path->nodes[*level]);
2379 path->nodes[*level] = NULL;
2386 static int check_root_dir(struct inode_record *rec)
2388 struct inode_backref *backref;
2391 if (!rec->found_inode_item || rec->errors)
2393 if (rec->nlink != 1 || rec->found_link != 0)
2395 if (list_empty(&rec->backrefs))
2397 backref = to_inode_backref(rec->backrefs.next);
2398 if (!backref->found_inode_ref)
2400 if (backref->index != 0 || backref->namelen != 2 ||
2401 memcmp(backref->name, "..", 2))
2403 if (backref->found_dir_index || backref->found_dir_item)
2410 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2411 struct btrfs_root *root, struct btrfs_path *path,
2412 struct inode_record *rec)
2414 struct btrfs_inode_item *ei;
2415 struct btrfs_key key;
2418 key.objectid = rec->ino;
2419 key.type = BTRFS_INODE_ITEM_KEY;
2420 key.offset = (u64)-1;
2422 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2426 if (!path->slots[0]) {
2433 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2434 if (key.objectid != rec->ino) {
2439 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2440 struct btrfs_inode_item);
2441 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2442 btrfs_mark_buffer_dirty(path->nodes[0]);
2443 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2444 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2445 root->root_key.objectid);
2447 btrfs_release_path(path);
2451 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2452 struct btrfs_root *root,
2453 struct btrfs_path *path,
2454 struct inode_record *rec)
2458 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2459 btrfs_release_path(path);
2461 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2465 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2466 struct btrfs_root *root,
2467 struct btrfs_path *path,
2468 struct inode_record *rec)
2470 struct btrfs_inode_item *ei;
2471 struct btrfs_key key;
2474 key.objectid = rec->ino;
2475 key.type = BTRFS_INODE_ITEM_KEY;
2478 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2485 /* Since ret == 0, no need to check anything */
2486 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2487 struct btrfs_inode_item);
2488 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2489 btrfs_mark_buffer_dirty(path->nodes[0]);
2490 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2491 printf("reset nbytes for ino %llu root %llu\n",
2492 rec->ino, root->root_key.objectid);
2494 btrfs_release_path(path);
2498 static int add_missing_dir_index(struct btrfs_root *root,
2499 struct cache_tree *inode_cache,
2500 struct inode_record *rec,
2501 struct inode_backref *backref)
2503 struct btrfs_path path;
2504 struct btrfs_trans_handle *trans;
2505 struct btrfs_dir_item *dir_item;
2506 struct extent_buffer *leaf;
2507 struct btrfs_key key;
2508 struct btrfs_disk_key disk_key;
2509 struct inode_record *dir_rec;
2510 unsigned long name_ptr;
2511 u32 data_size = sizeof(*dir_item) + backref->namelen;
2514 trans = btrfs_start_transaction(root, 1);
2516 return PTR_ERR(trans);
2518 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2519 (unsigned long long)rec->ino);
2521 btrfs_init_path(&path);
2522 key.objectid = backref->dir;
2523 key.type = BTRFS_DIR_INDEX_KEY;
2524 key.offset = backref->index;
2525 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2528 leaf = path.nodes[0];
2529 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2531 disk_key.objectid = cpu_to_le64(rec->ino);
2532 disk_key.type = BTRFS_INODE_ITEM_KEY;
2533 disk_key.offset = 0;
2535 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2536 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2537 btrfs_set_dir_data_len(leaf, dir_item, 0);
2538 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2539 name_ptr = (unsigned long)(dir_item + 1);
2540 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2541 btrfs_mark_buffer_dirty(leaf);
2542 btrfs_release_path(&path);
2543 btrfs_commit_transaction(trans, root);
2545 backref->found_dir_index = 1;
2546 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2547 BUG_ON(IS_ERR(dir_rec));
2550 dir_rec->found_size += backref->namelen;
2551 if (dir_rec->found_size == dir_rec->isize &&
2552 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2553 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2554 if (dir_rec->found_size != dir_rec->isize)
2555 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2560 static int delete_dir_index(struct btrfs_root *root,
2561 struct inode_backref *backref)
2563 struct btrfs_trans_handle *trans;
2564 struct btrfs_dir_item *di;
2565 struct btrfs_path path;
2568 trans = btrfs_start_transaction(root, 1);
2570 return PTR_ERR(trans);
2572 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2573 (unsigned long long)backref->dir,
2574 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2575 (unsigned long long)root->objectid);
2577 btrfs_init_path(&path);
2578 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2579 backref->name, backref->namelen,
2580 backref->index, -1);
2583 btrfs_release_path(&path);
2584 btrfs_commit_transaction(trans, root);
2591 ret = btrfs_del_item(trans, root, &path);
2593 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2595 btrfs_release_path(&path);
2596 btrfs_commit_transaction(trans, root);
2600 static int create_inode_item(struct btrfs_root *root,
2601 struct inode_record *rec,
2604 struct btrfs_trans_handle *trans;
2605 struct btrfs_inode_item inode_item;
2606 time_t now = time(NULL);
2609 trans = btrfs_start_transaction(root, 1);
2610 if (IS_ERR(trans)) {
2611 ret = PTR_ERR(trans);
2615 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2616 "be incomplete, please check permissions and content after "
2617 "the fsck completes.\n", (unsigned long long)root->objectid,
2618 (unsigned long long)rec->ino);
2620 memset(&inode_item, 0, sizeof(inode_item));
2621 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2623 btrfs_set_stack_inode_nlink(&inode_item, 1);
2625 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2626 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2627 if (rec->found_dir_item) {
2628 if (rec->found_file_extent)
2629 fprintf(stderr, "root %llu inode %llu has both a dir "
2630 "item and extents, unsure if it is a dir or a "
2631 "regular file so setting it as a directory\n",
2632 (unsigned long long)root->objectid,
2633 (unsigned long long)rec->ino);
2634 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2635 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2636 } else if (!rec->found_dir_item) {
2637 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2638 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2640 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2641 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2642 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2643 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2644 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2645 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2646 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2647 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2649 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2651 btrfs_commit_transaction(trans, root);
2655 static int repair_inode_backrefs(struct btrfs_root *root,
2656 struct inode_record *rec,
2657 struct cache_tree *inode_cache,
2660 struct inode_backref *tmp, *backref;
2661 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2665 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2666 if (!delete && rec->ino == root_dirid) {
2667 if (!rec->found_inode_item) {
2668 ret = create_inode_item(root, rec, 1);
2675 /* Index 0 for root dir's are special, don't mess with it */
2676 if (rec->ino == root_dirid && backref->index == 0)
2680 ((backref->found_dir_index && !backref->found_inode_ref) ||
2681 (backref->found_dir_index && backref->found_inode_ref &&
2682 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2683 ret = delete_dir_index(root, backref);
2687 list_del(&backref->list);
2692 if (!delete && !backref->found_dir_index &&
2693 backref->found_dir_item && backref->found_inode_ref) {
2694 ret = add_missing_dir_index(root, inode_cache, rec,
2699 if (backref->found_dir_item &&
2700 backref->found_dir_index) {
2701 if (!backref->errors &&
2702 backref->found_inode_ref) {
2703 list_del(&backref->list);
2710 if (!delete && (!backref->found_dir_index &&
2711 !backref->found_dir_item &&
2712 backref->found_inode_ref)) {
2713 struct btrfs_trans_handle *trans;
2714 struct btrfs_key location;
2716 ret = check_dir_conflict(root, backref->name,
2722 * let nlink fixing routine to handle it,
2723 * which can do it better.
2728 location.objectid = rec->ino;
2729 location.type = BTRFS_INODE_ITEM_KEY;
2730 location.offset = 0;
2732 trans = btrfs_start_transaction(root, 1);
2733 if (IS_ERR(trans)) {
2734 ret = PTR_ERR(trans);
2737 fprintf(stderr, "adding missing dir index/item pair "
2739 (unsigned long long)rec->ino);
2740 ret = btrfs_insert_dir_item(trans, root, backref->name,
2742 backref->dir, &location,
2743 imode_to_type(rec->imode),
2746 btrfs_commit_transaction(trans, root);
2750 if (!delete && (backref->found_inode_ref &&
2751 backref->found_dir_index &&
2752 backref->found_dir_item &&
2753 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2754 !rec->found_inode_item)) {
2755 ret = create_inode_item(root, rec, 0);
2762 return ret ? ret : repaired;
2766 * To determine the file type for nlink/inode_item repair
2768 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2769 * Return -ENOENT if file type is not found.
2771 static int find_file_type(struct inode_record *rec, u8 *type)
2773 struct inode_backref *backref;
2775 /* For inode item recovered case */
2776 if (rec->found_inode_item) {
2777 *type = imode_to_type(rec->imode);
2781 list_for_each_entry(backref, &rec->backrefs, list) {
2782 if (backref->found_dir_index || backref->found_dir_item) {
2783 *type = backref->filetype;
2791 * To determine the file name for nlink repair
2793 * Return 0 if file name is found, set name and namelen.
2794 * Return -ENOENT if file name is not found.
2796 static int find_file_name(struct inode_record *rec,
2797 char *name, int *namelen)
2799 struct inode_backref *backref;
2801 list_for_each_entry(backref, &rec->backrefs, list) {
2802 if (backref->found_dir_index || backref->found_dir_item ||
2803 backref->found_inode_ref) {
2804 memcpy(name, backref->name, backref->namelen);
2805 *namelen = backref->namelen;
2812 /* Reset the nlink of the inode to the correct one */
2813 static int reset_nlink(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct inode_backref *backref;
2819 struct inode_backref *tmp;
2820 struct btrfs_key key;
2821 struct btrfs_inode_item *inode_item;
2824 /* We don't believe this either, reset it and iterate backref */
2825 rec->found_link = 0;
2827 /* Remove all backref including the valid ones */
2828 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2829 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2830 backref->index, backref->name,
2831 backref->namelen, 0);
2835 /* remove invalid backref, so it won't be added back */
2836 if (!(backref->found_dir_index &&
2837 backref->found_dir_item &&
2838 backref->found_inode_ref)) {
2839 list_del(&backref->list);
2846 /* Set nlink to 0 */
2847 key.objectid = rec->ino;
2848 key.type = BTRFS_INODE_ITEM_KEY;
2850 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2857 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2858 struct btrfs_inode_item);
2859 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2860 btrfs_mark_buffer_dirty(path->nodes[0]);
2861 btrfs_release_path(path);
2864 * Add back valid inode_ref/dir_item/dir_index,
2865 * add_link() will handle the nlink inc, so new nlink must be correct
2867 list_for_each_entry(backref, &rec->backrefs, list) {
2868 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2869 backref->name, backref->namelen,
2870 backref->filetype, &backref->index, 1);
2875 btrfs_release_path(path);
2879 static int get_highest_inode(struct btrfs_trans_handle *trans,
2880 struct btrfs_root *root,
2881 struct btrfs_path *path,
2884 struct btrfs_key key, found_key;
2887 btrfs_init_path(path);
2888 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2890 key.type = BTRFS_INODE_ITEM_KEY;
2891 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2893 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2894 path->slots[0] - 1);
2895 *highest_ino = found_key.objectid;
2898 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2900 btrfs_release_path(path);
2904 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2905 struct btrfs_root *root,
2906 struct btrfs_path *path,
2907 struct inode_record *rec)
2909 char *dir_name = "lost+found";
2910 char namebuf[BTRFS_NAME_LEN] = {0};
2915 int name_recovered = 0;
2916 int type_recovered = 0;
2920 * Get file name and type first before these invalid inode ref
2921 * are deleted by remove_all_invalid_backref()
2923 name_recovered = !find_file_name(rec, namebuf, &namelen);
2924 type_recovered = !find_file_type(rec, &type);
2926 if (!name_recovered) {
2927 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2928 rec->ino, rec->ino);
2929 namelen = count_digits(rec->ino);
2930 sprintf(namebuf, "%llu", rec->ino);
2933 if (!type_recovered) {
2934 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2936 type = BTRFS_FT_REG_FILE;
2940 ret = reset_nlink(trans, root, path, rec);
2943 "Failed to reset nlink for inode %llu: %s\n",
2944 rec->ino, strerror(-ret));
2948 if (rec->found_link == 0) {
2949 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2953 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2954 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2957 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2958 dir_name, strerror(-ret));
2961 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2962 namebuf, namelen, type, NULL, 1);
2964 * Add ".INO" suffix several times to handle case where
2965 * "FILENAME.INO" is already taken by another file.
2967 while (ret == -EEXIST) {
2969 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2971 if (namelen + count_digits(rec->ino) + 1 >
2976 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2978 namelen += count_digits(rec->ino) + 1;
2979 ret = btrfs_add_link(trans, root, rec->ino,
2980 lost_found_ino, namebuf,
2981 namelen, type, NULL, 1);
2985 "Failed to link the inode %llu to %s dir: %s\n",
2986 rec->ino, dir_name, strerror(-ret));
2990 * Just increase the found_link, don't actually add the
2991 * backref. This will make things easier and this inode
2992 * record will be freed after the repair is done.
2993 * So fsck will not report problem about this inode.
2996 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2997 namelen, namebuf, dir_name);
2999 printf("Fixed the nlink of inode %llu\n", rec->ino);
3002 * Clear the flag anyway, or we will loop forever for the same inode
3003 * as it will not be removed from the bad inode list and the dead loop
3006 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3007 btrfs_release_path(path);
3012 * Check if there is any normal(reg or prealloc) file extent for given
3014 * This is used to determine the file type when neither its dir_index/item or
3015 * inode_item exists.
3017 * This will *NOT* report error, if any error happens, just consider it does
3018 * not have any normal file extent.
3020 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3022 struct btrfs_path path;
3023 struct btrfs_key key;
3024 struct btrfs_key found_key;
3025 struct btrfs_file_extent_item *fi;
3029 btrfs_init_path(&path);
3031 key.type = BTRFS_EXTENT_DATA_KEY;
3034 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3039 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3040 ret = btrfs_next_leaf(root, &path);
3047 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3049 if (found_key.objectid != ino ||
3050 found_key.type != BTRFS_EXTENT_DATA_KEY)
3052 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3053 struct btrfs_file_extent_item);
3054 type = btrfs_file_extent_type(path.nodes[0], fi);
3055 if (type != BTRFS_FILE_EXTENT_INLINE) {
3061 btrfs_release_path(&path);
3065 static u32 btrfs_type_to_imode(u8 type)
3067 static u32 imode_by_btrfs_type[] = {
3068 [BTRFS_FT_REG_FILE] = S_IFREG,
3069 [BTRFS_FT_DIR] = S_IFDIR,
3070 [BTRFS_FT_CHRDEV] = S_IFCHR,
3071 [BTRFS_FT_BLKDEV] = S_IFBLK,
3072 [BTRFS_FT_FIFO] = S_IFIFO,
3073 [BTRFS_FT_SOCK] = S_IFSOCK,
3074 [BTRFS_FT_SYMLINK] = S_IFLNK,
3077 return imode_by_btrfs_type[(type)];
3080 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3081 struct btrfs_root *root,
3082 struct btrfs_path *path,
3083 struct inode_record *rec)
3087 int type_recovered = 0;
3090 printf("Trying to rebuild inode:%llu\n", rec->ino);
3092 type_recovered = !find_file_type(rec, &filetype);
3095 * Try to determine inode type if type not found.
3097 * For found regular file extent, it must be FILE.
3098 * For found dir_item/index, it must be DIR.
3100 * For undetermined one, use FILE as fallback.
3103 * 1. If found backref(inode_index/item is already handled) to it,
3105 * Need new inode-inode ref structure to allow search for that.
3107 if (!type_recovered) {
3108 if (rec->found_file_extent &&
3109 find_normal_file_extent(root, rec->ino)) {
3111 filetype = BTRFS_FT_REG_FILE;
3112 } else if (rec->found_dir_item) {
3114 filetype = BTRFS_FT_DIR;
3115 } else if (!list_empty(&rec->orphan_extents)) {
3117 filetype = BTRFS_FT_REG_FILE;
3119 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3122 filetype = BTRFS_FT_REG_FILE;
3126 ret = btrfs_new_inode(trans, root, rec->ino,
3127 mode | btrfs_type_to_imode(filetype));
3132 * Here inode rebuild is done, we only rebuild the inode item,
3133 * don't repair the nlink(like move to lost+found).
3134 * That is the job of nlink repair.
3136 * We just fill the record and return
3138 rec->found_dir_item = 1;
3139 rec->imode = mode | btrfs_type_to_imode(filetype);
3141 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3142 /* Ensure the inode_nlinks repair function will be called */
3143 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3148 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3149 struct btrfs_root *root,
3150 struct btrfs_path *path,
3151 struct inode_record *rec)
3153 struct orphan_data_extent *orphan;
3154 struct orphan_data_extent *tmp;
3157 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3159 * Check for conflicting file extents
3161 * Here we don't know whether the extents is compressed or not,
3162 * so we can only assume it not compressed nor data offset,
3163 * and use its disk_len as extent length.
3165 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3166 orphan->offset, orphan->disk_len, 0);
3167 btrfs_release_path(path);
3172 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3173 orphan->disk_bytenr, orphan->disk_len);
3174 ret = btrfs_free_extent(trans,
3175 root->fs_info->extent_root,
3176 orphan->disk_bytenr, orphan->disk_len,
3177 0, root->objectid, orphan->objectid,
3182 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3183 orphan->offset, orphan->disk_bytenr,
3184 orphan->disk_len, orphan->disk_len);
3188 /* Update file size info */
3189 rec->found_size += orphan->disk_len;
3190 if (rec->found_size == rec->nbytes)
3191 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3193 /* Update the file extent hole info too */
3194 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3198 if (RB_EMPTY_ROOT(&rec->holes))
3199 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3201 list_del(&orphan->list);
3204 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3209 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3210 struct btrfs_root *root,
3211 struct btrfs_path *path,
3212 struct inode_record *rec)
3214 struct rb_node *node;
3215 struct file_extent_hole *hole;
3219 node = rb_first(&rec->holes);
3223 hole = rb_entry(node, struct file_extent_hole, node);
3224 ret = btrfs_punch_hole(trans, root, rec->ino,
3225 hole->start, hole->len);
3228 ret = del_file_extent_hole(&rec->holes, hole->start,
3232 if (RB_EMPTY_ROOT(&rec->holes))
3233 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3234 node = rb_first(&rec->holes);
3236 /* special case for a file losing all its file extent */
3238 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3239 round_up(rec->isize,
3240 root->fs_info->sectorsize));
3244 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3245 rec->ino, root->objectid);
3250 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3252 struct btrfs_trans_handle *trans;
3253 struct btrfs_path path;
3256 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3257 I_ERR_NO_ORPHAN_ITEM |
3258 I_ERR_LINK_COUNT_WRONG |
3259 I_ERR_NO_INODE_ITEM |
3260 I_ERR_FILE_EXTENT_ORPHAN |
3261 I_ERR_FILE_EXTENT_DISCOUNT|
3262 I_ERR_FILE_NBYTES_WRONG)))
3266 * For nlink repair, it may create a dir and add link, so
3267 * 2 for parent(256)'s dir_index and dir_item
3268 * 2 for lost+found dir's inode_item and inode_ref
3269 * 1 for the new inode_ref of the file
3270 * 2 for lost+found dir's dir_index and dir_item for the file
3272 trans = btrfs_start_transaction(root, 7);
3274 return PTR_ERR(trans);
3276 btrfs_init_path(&path);
3277 if (rec->errors & I_ERR_NO_INODE_ITEM)
3278 ret = repair_inode_no_item(trans, root, &path, rec);
3279 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3280 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3281 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3282 ret = repair_inode_discount_extent(trans, root, &path, rec);
3283 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3284 ret = repair_inode_isize(trans, root, &path, rec);
3285 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3286 ret = repair_inode_orphan_item(trans, root, &path, rec);
3287 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3288 ret = repair_inode_nlinks(trans, root, &path, rec);
3289 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3290 ret = repair_inode_nbytes(trans, root, &path, rec);
3291 btrfs_commit_transaction(trans, root);
3292 btrfs_release_path(&path);
3296 static int check_inode_recs(struct btrfs_root *root,
3297 struct cache_tree *inode_cache)
3299 struct cache_extent *cache;
3300 struct ptr_node *node;
3301 struct inode_record *rec;
3302 struct inode_backref *backref;
3307 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3309 if (btrfs_root_refs(&root->root_item) == 0) {
3310 if (!cache_tree_empty(inode_cache))
3311 fprintf(stderr, "warning line %d\n", __LINE__);
3316 * We need to repair backrefs first because we could change some of the
3317 * errors in the inode recs.
3319 * We also need to go through and delete invalid backrefs first and then
3320 * add the correct ones second. We do this because we may get EEXIST
3321 * when adding back the correct index because we hadn't yet deleted the
3324 * For example, if we were missing a dir index then the directories
3325 * isize would be wrong, so if we fixed the isize to what we thought it
3326 * would be and then fixed the backref we'd still have a invalid fs, so
3327 * we need to add back the dir index and then check to see if the isize
3332 if (stage == 3 && !err)
3335 cache = search_cache_extent(inode_cache, 0);
3336 while (repair && cache) {
3337 node = container_of(cache, struct ptr_node, cache);
3339 cache = next_cache_extent(cache);
3341 /* Need to free everything up and rescan */
3343 remove_cache_extent(inode_cache, &node->cache);
3345 free_inode_rec(rec);
3349 if (list_empty(&rec->backrefs))
3352 ret = repair_inode_backrefs(root, rec, inode_cache,
3366 rec = get_inode_rec(inode_cache, root_dirid, 0);
3367 BUG_ON(IS_ERR(rec));
3369 ret = check_root_dir(rec);
3371 fprintf(stderr, "root %llu root dir %llu error\n",
3372 (unsigned long long)root->root_key.objectid,
3373 (unsigned long long)root_dirid);
3374 print_inode_error(root, rec);
3379 struct btrfs_trans_handle *trans;
3381 trans = btrfs_start_transaction(root, 1);
3382 if (IS_ERR(trans)) {
3383 err = PTR_ERR(trans);
3388 "root %llu missing its root dir, recreating\n",
3389 (unsigned long long)root->objectid);
3391 ret = btrfs_make_root_dir(trans, root, root_dirid);
3394 btrfs_commit_transaction(trans, root);
3398 fprintf(stderr, "root %llu root dir %llu not found\n",
3399 (unsigned long long)root->root_key.objectid,
3400 (unsigned long long)root_dirid);
3404 cache = search_cache_extent(inode_cache, 0);
3407 node = container_of(cache, struct ptr_node, cache);
3409 remove_cache_extent(inode_cache, &node->cache);
3411 if (rec->ino == root_dirid ||
3412 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3413 free_inode_rec(rec);
3417 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3418 ret = check_orphan_item(root, rec->ino);
3420 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3421 if (can_free_inode_rec(rec)) {
3422 free_inode_rec(rec);
3427 if (!rec->found_inode_item)
3428 rec->errors |= I_ERR_NO_INODE_ITEM;
3429 if (rec->found_link != rec->nlink)
3430 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3432 ret = try_repair_inode(root, rec);
3433 if (ret == 0 && can_free_inode_rec(rec)) {
3434 free_inode_rec(rec);
3440 if (!(repair && ret == 0))
3442 print_inode_error(root, rec);
3443 list_for_each_entry(backref, &rec->backrefs, list) {
3444 if (!backref->found_dir_item)
3445 backref->errors |= REF_ERR_NO_DIR_ITEM;
3446 if (!backref->found_dir_index)
3447 backref->errors |= REF_ERR_NO_DIR_INDEX;
3448 if (!backref->found_inode_ref)
3449 backref->errors |= REF_ERR_NO_INODE_REF;
3450 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3451 " namelen %u name %s filetype %d errors %x",
3452 (unsigned long long)backref->dir,
3453 (unsigned long long)backref->index,
3454 backref->namelen, backref->name,
3455 backref->filetype, backref->errors);
3456 print_ref_error(backref->errors);
3458 free_inode_rec(rec);
3460 return (error > 0) ? -1 : 0;
3463 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3466 struct cache_extent *cache;
3467 struct root_record *rec = NULL;
3470 cache = lookup_cache_extent(root_cache, objectid, 1);
3472 rec = container_of(cache, struct root_record, cache);
3474 rec = calloc(1, sizeof(*rec));
3476 return ERR_PTR(-ENOMEM);
3477 rec->objectid = objectid;
3478 INIT_LIST_HEAD(&rec->backrefs);
3479 rec->cache.start = objectid;
3480 rec->cache.size = 1;
3482 ret = insert_cache_extent(root_cache, &rec->cache);
3484 return ERR_PTR(-EEXIST);
3489 static struct root_backref *get_root_backref(struct root_record *rec,
3490 u64 ref_root, u64 dir, u64 index,
3491 const char *name, int namelen)
3493 struct root_backref *backref;
3495 list_for_each_entry(backref, &rec->backrefs, list) {
3496 if (backref->ref_root != ref_root || backref->dir != dir ||
3497 backref->namelen != namelen)
3499 if (memcmp(name, backref->name, namelen))
3504 backref = calloc(1, sizeof(*backref) + namelen + 1);
3507 backref->ref_root = ref_root;
3509 backref->index = index;
3510 backref->namelen = namelen;
3511 memcpy(backref->name, name, namelen);
3512 backref->name[namelen] = '\0';
3513 list_add_tail(&backref->list, &rec->backrefs);
3517 static void free_root_record(struct cache_extent *cache)
3519 struct root_record *rec;
3520 struct root_backref *backref;
3522 rec = container_of(cache, struct root_record, cache);
3523 while (!list_empty(&rec->backrefs)) {
3524 backref = to_root_backref(rec->backrefs.next);
3525 list_del(&backref->list);
3532 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3534 static int add_root_backref(struct cache_tree *root_cache,
3535 u64 root_id, u64 ref_root, u64 dir, u64 index,
3536 const char *name, int namelen,
3537 int item_type, int errors)
3539 struct root_record *rec;
3540 struct root_backref *backref;
3542 rec = get_root_rec(root_cache, root_id);
3543 BUG_ON(IS_ERR(rec));
3544 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3547 backref->errors |= errors;
3549 if (item_type != BTRFS_DIR_ITEM_KEY) {
3550 if (backref->found_dir_index || backref->found_back_ref ||
3551 backref->found_forward_ref) {
3552 if (backref->index != index)
3553 backref->errors |= REF_ERR_INDEX_UNMATCH;
3555 backref->index = index;
3559 if (item_type == BTRFS_DIR_ITEM_KEY) {
3560 if (backref->found_forward_ref)
3562 backref->found_dir_item = 1;
3563 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3564 backref->found_dir_index = 1;
3565 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3566 if (backref->found_forward_ref)
3567 backref->errors |= REF_ERR_DUP_ROOT_REF;
3568 else if (backref->found_dir_item)
3570 backref->found_forward_ref = 1;
3571 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3572 if (backref->found_back_ref)
3573 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3574 backref->found_back_ref = 1;
3579 if (backref->found_forward_ref && backref->found_dir_item)
3580 backref->reachable = 1;
3584 static int merge_root_recs(struct btrfs_root *root,
3585 struct cache_tree *src_cache,
3586 struct cache_tree *dst_cache)
3588 struct cache_extent *cache;
3589 struct ptr_node *node;
3590 struct inode_record *rec;
3591 struct inode_backref *backref;
3594 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3595 free_inode_recs_tree(src_cache);
3600 cache = search_cache_extent(src_cache, 0);
3603 node = container_of(cache, struct ptr_node, cache);
3605 remove_cache_extent(src_cache, &node->cache);
3608 ret = is_child_root(root, root->objectid, rec->ino);
3614 list_for_each_entry(backref, &rec->backrefs, list) {
3615 BUG_ON(backref->found_inode_ref);
3616 if (backref->found_dir_item)
3617 add_root_backref(dst_cache, rec->ino,
3618 root->root_key.objectid, backref->dir,
3619 backref->index, backref->name,
3620 backref->namelen, BTRFS_DIR_ITEM_KEY,
3622 if (backref->found_dir_index)
3623 add_root_backref(dst_cache, rec->ino,
3624 root->root_key.objectid, backref->dir,
3625 backref->index, backref->name,
3626 backref->namelen, BTRFS_DIR_INDEX_KEY,
3630 free_inode_rec(rec);
3637 static int check_root_refs(struct btrfs_root *root,
3638 struct cache_tree *root_cache)
3640 struct root_record *rec;
3641 struct root_record *ref_root;
3642 struct root_backref *backref;
3643 struct cache_extent *cache;
3649 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3650 BUG_ON(IS_ERR(rec));
3653 /* fixme: this can not detect circular references */
3656 cache = search_cache_extent(root_cache, 0);
3660 rec = container_of(cache, struct root_record, cache);
3661 cache = next_cache_extent(cache);
3663 if (rec->found_ref == 0)
3666 list_for_each_entry(backref, &rec->backrefs, list) {
3667 if (!backref->reachable)
3670 ref_root = get_root_rec(root_cache,
3672 BUG_ON(IS_ERR(ref_root));
3673 if (ref_root->found_ref > 0)
3676 backref->reachable = 0;
3678 if (rec->found_ref == 0)
3684 cache = search_cache_extent(root_cache, 0);
3688 rec = container_of(cache, struct root_record, cache);
3689 cache = next_cache_extent(cache);
3691 if (rec->found_ref == 0 &&
3692 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3693 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3694 ret = check_orphan_item(root->fs_info->tree_root,
3700 * If we don't have a root item then we likely just have
3701 * a dir item in a snapshot for this root but no actual
3702 * ref key or anything so it's meaningless.
3704 if (!rec->found_root_item)
3707 fprintf(stderr, "fs tree %llu not referenced\n",
3708 (unsigned long long)rec->objectid);
3712 if (rec->found_ref > 0 && !rec->found_root_item)
3714 list_for_each_entry(backref, &rec->backrefs, list) {
3715 if (!backref->found_dir_item)
3716 backref->errors |= REF_ERR_NO_DIR_ITEM;
3717 if (!backref->found_dir_index)
3718 backref->errors |= REF_ERR_NO_DIR_INDEX;
3719 if (!backref->found_back_ref)
3720 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3721 if (!backref->found_forward_ref)
3722 backref->errors |= REF_ERR_NO_ROOT_REF;
3723 if (backref->reachable && backref->errors)
3730 fprintf(stderr, "fs tree %llu refs %u %s\n",
3731 (unsigned long long)rec->objectid, rec->found_ref,
3732 rec->found_root_item ? "" : "not found");
3734 list_for_each_entry(backref, &rec->backrefs, list) {
3735 if (!backref->reachable)
3737 if (!backref->errors && rec->found_root_item)
3739 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3740 " index %llu namelen %u name %s errors %x\n",
3741 (unsigned long long)backref->ref_root,
3742 (unsigned long long)backref->dir,
3743 (unsigned long long)backref->index,
3744 backref->namelen, backref->name,
3746 print_ref_error(backref->errors);
3749 return errors > 0 ? 1 : 0;
3752 static int process_root_ref(struct extent_buffer *eb, int slot,
3753 struct btrfs_key *key,
3754 struct cache_tree *root_cache)
3760 struct btrfs_root_ref *ref;
3761 char namebuf[BTRFS_NAME_LEN];
3764 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3766 dirid = btrfs_root_ref_dirid(eb, ref);
3767 index = btrfs_root_ref_sequence(eb, ref);
3768 name_len = btrfs_root_ref_name_len(eb, ref);
3770 if (name_len <= BTRFS_NAME_LEN) {
3774 len = BTRFS_NAME_LEN;
3775 error = REF_ERR_NAME_TOO_LONG;
3777 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3779 if (key->type == BTRFS_ROOT_REF_KEY) {
3780 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3781 index, namebuf, len, key->type, error);
3783 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3784 index, namebuf, len, key->type, error);
3789 static void free_corrupt_block(struct cache_extent *cache)
3791 struct btrfs_corrupt_block *corrupt;
3793 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3797 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3800 * Repair the btree of the given root.
3802 * The fix is to remove the node key in corrupt_blocks cache_tree.
3803 * and rebalance the tree.
3804 * After the fix, the btree should be writeable.
3806 static int repair_btree(struct btrfs_root *root,
3807 struct cache_tree *corrupt_blocks)
3809 struct btrfs_trans_handle *trans;
3810 struct btrfs_path path;
3811 struct btrfs_corrupt_block *corrupt;
3812 struct cache_extent *cache;
3813 struct btrfs_key key;
3818 if (cache_tree_empty(corrupt_blocks))
3821 trans = btrfs_start_transaction(root, 1);
3822 if (IS_ERR(trans)) {
3823 ret = PTR_ERR(trans);
3824 fprintf(stderr, "Error starting transaction: %s\n",
3828 btrfs_init_path(&path);
3829 cache = first_cache_extent(corrupt_blocks);
3831 corrupt = container_of(cache, struct btrfs_corrupt_block,
3833 level = corrupt->level;
3834 path.lowest_level = level;
3835 key.objectid = corrupt->key.objectid;
3836 key.type = corrupt->key.type;
3837 key.offset = corrupt->key.offset;
3840 * Here we don't want to do any tree balance, since it may
3841 * cause a balance with corrupted brother leaf/node,
3842 * so ins_len set to 0 here.
3843 * Balance will be done after all corrupt node/leaf is deleted.
3845 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3848 offset = btrfs_node_blockptr(path.nodes[level],
3851 /* Remove the ptr */
3852 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3856 * Remove the corresponding extent
3857 * return value is not concerned.
3859 btrfs_release_path(&path);
3860 ret = btrfs_free_extent(trans, root, offset,
3861 root->fs_info->nodesize, 0,
3862 root->root_key.objectid, level - 1, 0);
3863 cache = next_cache_extent(cache);
3866 /* Balance the btree using btrfs_search_slot() */
3867 cache = first_cache_extent(corrupt_blocks);
3869 corrupt = container_of(cache, struct btrfs_corrupt_block,
3871 memcpy(&key, &corrupt->key, sizeof(key));
3872 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3875 /* return will always >0 since it won't find the item */
3877 btrfs_release_path(&path);
3878 cache = next_cache_extent(cache);
3881 btrfs_commit_transaction(trans, root);
3882 btrfs_release_path(&path);
3886 static int check_fs_root(struct btrfs_root *root,
3887 struct cache_tree *root_cache,
3888 struct walk_control *wc)
3894 struct btrfs_path path;
3895 struct shared_node root_node;
3896 struct root_record *rec;
3897 struct btrfs_root_item *root_item = &root->root_item;
3898 struct cache_tree corrupt_blocks;
3899 struct orphan_data_extent *orphan;
3900 struct orphan_data_extent *tmp;
3901 enum btrfs_tree_block_status status;
3902 struct node_refs nrefs;
3905 * Reuse the corrupt_block cache tree to record corrupted tree block
3907 * Unlike the usage in extent tree check, here we do it in a per
3908 * fs/subvol tree base.
3910 cache_tree_init(&corrupt_blocks);
3911 root->fs_info->corrupt_blocks = &corrupt_blocks;
3913 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3914 rec = get_root_rec(root_cache, root->root_key.objectid);
3915 BUG_ON(IS_ERR(rec));
3916 if (btrfs_root_refs(root_item) > 0)
3917 rec->found_root_item = 1;
3920 btrfs_init_path(&path);
3921 memset(&root_node, 0, sizeof(root_node));
3922 cache_tree_init(&root_node.root_cache);
3923 cache_tree_init(&root_node.inode_cache);
3924 memset(&nrefs, 0, sizeof(nrefs));
3926 /* Move the orphan extent record to corresponding inode_record */
3927 list_for_each_entry_safe(orphan, tmp,
3928 &root->orphan_data_extents, list) {
3929 struct inode_record *inode;
3931 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3933 BUG_ON(IS_ERR(inode));
3934 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3935 list_move(&orphan->list, &inode->orphan_extents);
3938 level = btrfs_header_level(root->node);
3939 memset(wc->nodes, 0, sizeof(wc->nodes));
3940 wc->nodes[level] = &root_node;
3941 wc->active_node = level;
3942 wc->root_level = level;
3944 /* We may not have checked the root block, lets do that now */
3945 if (btrfs_is_leaf(root->node))
3946 status = btrfs_check_leaf(root, NULL, root->node);
3948 status = btrfs_check_node(root, NULL, root->node);
3949 if (status != BTRFS_TREE_BLOCK_CLEAN)
3952 if (btrfs_root_refs(root_item) > 0 ||
3953 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3954 path.nodes[level] = root->node;
3955 extent_buffer_get(root->node);
3956 path.slots[level] = 0;
3958 struct btrfs_key key;
3959 struct btrfs_disk_key found_key;
3961 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3962 level = root_item->drop_level;
3963 path.lowest_level = level;
3964 if (level > btrfs_header_level(root->node) ||
3965 level >= BTRFS_MAX_LEVEL) {
3966 error("ignoring invalid drop level: %u", level);
3969 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3972 btrfs_node_key(path.nodes[level], &found_key,
3974 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3975 sizeof(found_key)));
3979 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3985 wret = walk_up_tree(root, &path, wc, &level);
3992 btrfs_release_path(&path);
3994 if (!cache_tree_empty(&corrupt_blocks)) {
3995 struct cache_extent *cache;
3996 struct btrfs_corrupt_block *corrupt;
3998 printf("The following tree block(s) is corrupted in tree %llu:\n",
3999 root->root_key.objectid);
4000 cache = first_cache_extent(&corrupt_blocks);
4002 corrupt = container_of(cache,
4003 struct btrfs_corrupt_block,
4005 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4006 cache->start, corrupt->level,
4007 corrupt->key.objectid, corrupt->key.type,
4008 corrupt->key.offset);
4009 cache = next_cache_extent(cache);
4012 printf("Try to repair the btree for root %llu\n",
4013 root->root_key.objectid);
4014 ret = repair_btree(root, &corrupt_blocks);
4016 fprintf(stderr, "Failed to repair btree: %s\n",
4019 printf("Btree for root %llu is fixed\n",
4020 root->root_key.objectid);
4024 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4028 if (root_node.current) {
4029 root_node.current->checked = 1;
4030 maybe_free_inode_rec(&root_node.inode_cache,
4034 err = check_inode_recs(root, &root_node.inode_cache);
4038 free_corrupt_blocks_tree(&corrupt_blocks);
4039 root->fs_info->corrupt_blocks = NULL;
4040 free_orphan_data_extents(&root->orphan_data_extents);
4044 static int fs_root_objectid(u64 objectid)
4046 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4047 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4049 return is_fstree(objectid);
4052 static int check_fs_roots(struct btrfs_root *root,
4053 struct cache_tree *root_cache)
4055 struct btrfs_path path;
4056 struct btrfs_key key;
4057 struct walk_control wc;
4058 struct extent_buffer *leaf, *tree_node;
4059 struct btrfs_root *tmp_root;
4060 struct btrfs_root *tree_root = root->fs_info->tree_root;
4064 if (ctx.progress_enabled) {
4065 ctx.tp = TASK_FS_ROOTS;
4066 task_start(ctx.info);
4070 * Just in case we made any changes to the extent tree that weren't
4071 * reflected into the free space cache yet.
4074 reset_cached_block_groups(root->fs_info);
4075 memset(&wc, 0, sizeof(wc));
4076 cache_tree_init(&wc.shared);
4077 btrfs_init_path(&path);
4082 key.type = BTRFS_ROOT_ITEM_KEY;
4083 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4088 tree_node = tree_root->node;
4090 if (tree_node != tree_root->node) {
4091 free_root_recs_tree(root_cache);
4092 btrfs_release_path(&path);
4095 leaf = path.nodes[0];
4096 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4097 ret = btrfs_next_leaf(tree_root, &path);
4103 leaf = path.nodes[0];
4105 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4106 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4107 fs_root_objectid(key.objectid)) {
4108 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4109 tmp_root = btrfs_read_fs_root_no_cache(
4110 root->fs_info, &key);
4112 key.offset = (u64)-1;
4113 tmp_root = btrfs_read_fs_root(
4114 root->fs_info, &key);
4116 if (IS_ERR(tmp_root)) {
4120 ret = check_fs_root(tmp_root, root_cache, &wc);
4121 if (ret == -EAGAIN) {
4122 free_root_recs_tree(root_cache);
4123 btrfs_release_path(&path);
4128 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4129 btrfs_free_fs_root(tmp_root);
4130 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4131 key.type == BTRFS_ROOT_BACKREF_KEY) {
4132 process_root_ref(leaf, path.slots[0], &key,
4139 btrfs_release_path(&path);
4141 free_extent_cache_tree(&wc.shared);
4142 if (!cache_tree_empty(&wc.shared))
4143 fprintf(stderr, "warning line %d\n", __LINE__);
4145 task_stop(ctx.info);
4151 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4152 * INODE_REF/INODE_EXTREF match.
4154 * @root: the root of the fs/file tree
4155 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4156 * @key: the key of the DIR_ITEM/DIR_INDEX
4157 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4158 * distinguish root_dir between normal dir/file
4159 * @name: the name in the INODE_REF/INODE_EXTREF
4160 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4161 * @mode: the st_mode of INODE_ITEM
4163 * Return 0 if no error occurred.
4164 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4165 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4167 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4168 * not match for normal dir/file.
4170 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4171 struct btrfs_key *key, u64 index, char *name,
4172 u32 namelen, u32 mode)
4174 struct btrfs_path path;
4175 struct extent_buffer *node;
4176 struct btrfs_dir_item *di;
4177 struct btrfs_key location;
4178 char namebuf[BTRFS_NAME_LEN] = {0};
4188 btrfs_init_path(&path);
4189 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4191 ret = DIR_ITEM_MISSING;
4195 /* Process root dir and goto out*/
4198 ret = ROOT_DIR_ERROR;
4200 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4202 ref_key->type == BTRFS_INODE_REF_KEY ?
4204 ref_key->objectid, ref_key->offset,
4205 key->type == BTRFS_DIR_ITEM_KEY ?
4206 "DIR_ITEM" : "DIR_INDEX");
4214 /* Process normal file/dir */
4216 ret = DIR_ITEM_MISSING;
4218 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4220 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4221 ref_key->objectid, ref_key->offset,
4222 key->type == BTRFS_DIR_ITEM_KEY ?
4223 "DIR_ITEM" : "DIR_INDEX",
4224 key->objectid, key->offset, namelen, name,
4225 imode_to_type(mode));
4229 /* Check whether inode_id/filetype/name match */
4230 node = path.nodes[0];
4231 slot = path.slots[0];
4232 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4233 total = btrfs_item_size_nr(node, slot);
4234 while (cur < total) {
4235 ret = DIR_ITEM_MISMATCH;
4236 name_len = btrfs_dir_name_len(node, di);
4237 data_len = btrfs_dir_data_len(node, di);
4239 btrfs_dir_item_key_to_cpu(node, di, &location);
4240 if (location.objectid != ref_key->objectid ||
4241 location.type != BTRFS_INODE_ITEM_KEY ||
4242 location.offset != 0)
4245 filetype = btrfs_dir_type(node, di);
4246 if (imode_to_type(mode) != filetype)
4249 if (cur + sizeof(*di) + name_len > total ||
4250 name_len > BTRFS_NAME_LEN) {
4251 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4253 key->type == BTRFS_DIR_ITEM_KEY ?
4254 "DIR_ITEM" : "DIR_INDEX",
4255 key->objectid, key->offset, name_len);
4257 if (cur + sizeof(*di) > total)
4259 len = min_t(u32, total - cur - sizeof(*di),
4265 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4266 if (len != namelen || strncmp(namebuf, name, len))
4272 len = sizeof(*di) + name_len + data_len;
4273 di = (struct btrfs_dir_item *)((char *)di + len);
4276 if (ret == DIR_ITEM_MISMATCH)
4278 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4280 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4281 ref_key->objectid, ref_key->offset,
4282 key->type == BTRFS_DIR_ITEM_KEY ?
4283 "DIR_ITEM" : "DIR_INDEX",
4284 key->objectid, key->offset, namelen, name,
4285 imode_to_type(mode));
4287 btrfs_release_path(&path);
4292 * Traverse the given INODE_REF and call find_dir_item() to find related
4293 * DIR_ITEM/DIR_INDEX.
4295 * @root: the root of the fs/file tree
4296 * @ref_key: the key of the INODE_REF
4297 * @refs: the count of INODE_REF
4298 * @mode: the st_mode of INODE_ITEM
4300 * Return 0 if no error occurred.
4302 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4303 struct extent_buffer *node, int slot, u64 *refs,
4306 struct btrfs_key key;
4307 struct btrfs_inode_ref *ref;
4308 char namebuf[BTRFS_NAME_LEN] = {0};
4316 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4317 total = btrfs_item_size_nr(node, slot);
4320 /* Update inode ref count */
4323 index = btrfs_inode_ref_index(node, ref);
4324 name_len = btrfs_inode_ref_name_len(node, ref);
4325 if (cur + sizeof(*ref) + name_len > total ||
4326 name_len > BTRFS_NAME_LEN) {
4327 warning("root %llu INODE_REF[%llu %llu] name too long",
4328 root->objectid, ref_key->objectid, ref_key->offset);
4330 if (total < cur + sizeof(*ref))
4332 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4337 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4339 /* Check root dir ref name */
4340 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4341 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4342 root->objectid, ref_key->objectid, ref_key->offset,
4344 err |= ROOT_DIR_ERROR;
4347 /* Find related DIR_INDEX */
4348 key.objectid = ref_key->offset;
4349 key.type = BTRFS_DIR_INDEX_KEY;
4351 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4354 /* Find related dir_item */
4355 key.objectid = ref_key->offset;
4356 key.type = BTRFS_DIR_ITEM_KEY;
4357 key.offset = btrfs_name_hash(namebuf, len);
4358 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4361 len = sizeof(*ref) + name_len;
4362 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4372 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4373 * DIR_ITEM/DIR_INDEX.
4375 * @root: the root of the fs/file tree
4376 * @ref_key: the key of the INODE_EXTREF
4377 * @refs: the count of INODE_EXTREF
4378 * @mode: the st_mode of INODE_ITEM
4380 * Return 0 if no error occurred.
4382 static int check_inode_extref(struct btrfs_root *root,
4383 struct btrfs_key *ref_key,
4384 struct extent_buffer *node, int slot, u64 *refs,
4387 struct btrfs_key key;
4388 struct btrfs_inode_extref *extref;
4389 char namebuf[BTRFS_NAME_LEN] = {0};
4399 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4400 total = btrfs_item_size_nr(node, slot);
4403 /* update inode ref count */
4405 name_len = btrfs_inode_extref_name_len(node, extref);
4406 index = btrfs_inode_extref_index(node, extref);
4407 parent = btrfs_inode_extref_parent(node, extref);
4408 if (name_len <= BTRFS_NAME_LEN) {
4411 len = BTRFS_NAME_LEN;
4412 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4413 root->objectid, ref_key->objectid, ref_key->offset);
4415 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4417 /* Check root dir ref name */
4418 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4419 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4420 root->objectid, ref_key->objectid, ref_key->offset,
4422 err |= ROOT_DIR_ERROR;
4425 /* find related dir_index */
4426 key.objectid = parent;
4427 key.type = BTRFS_DIR_INDEX_KEY;
4429 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4432 /* find related dir_item */
4433 key.objectid = parent;
4434 key.type = BTRFS_DIR_ITEM_KEY;
4435 key.offset = btrfs_name_hash(namebuf, len);
4436 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4439 len = sizeof(*extref) + name_len;
4440 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4450 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4451 * DIR_ITEM/DIR_INDEX match.
4453 * @root: the root of the fs/file tree
4454 * @key: the key of the INODE_REF/INODE_EXTREF
4455 * @name: the name in the INODE_REF/INODE_EXTREF
4456 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4457 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4459 * @ext_ref: the EXTENDED_IREF feature
4461 * Return 0 if no error occurred.
4462 * Return >0 for error bitmap
4464 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4465 char *name, int namelen, u64 index,
4466 unsigned int ext_ref)
4468 struct btrfs_path path;
4469 struct btrfs_inode_ref *ref;
4470 struct btrfs_inode_extref *extref;
4471 struct extent_buffer *node;
4472 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4483 btrfs_init_path(&path);
4484 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4486 ret = INODE_REF_MISSING;
4490 node = path.nodes[0];
4491 slot = path.slots[0];
4493 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4494 total = btrfs_item_size_nr(node, slot);
4496 /* Iterate all entry of INODE_REF */
4497 while (cur < total) {
4498 ret = INODE_REF_MISSING;
4500 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4501 ref_index = btrfs_inode_ref_index(node, ref);
4502 if (index != (u64)-1 && index != ref_index)
4505 if (cur + sizeof(*ref) + ref_namelen > total ||
4506 ref_namelen > BTRFS_NAME_LEN) {
4507 warning("root %llu INODE %s[%llu %llu] name too long",
4509 key->type == BTRFS_INODE_REF_KEY ?
4511 key->objectid, key->offset);
4513 if (cur + sizeof(*ref) > total)
4515 len = min_t(u32, total - cur - sizeof(*ref),
4521 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4524 if (len != namelen || strncmp(ref_namebuf, name, len))
4530 len = sizeof(*ref) + ref_namelen;
4531 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4536 /* Skip if not support EXTENDED_IREF feature */
4540 btrfs_release_path(&path);
4541 btrfs_init_path(&path);
4543 dir_id = key->offset;
4544 key->type = BTRFS_INODE_EXTREF_KEY;
4545 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4547 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4549 ret = INODE_REF_MISSING;
4553 node = path.nodes[0];
4554 slot = path.slots[0];
4556 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4558 total = btrfs_item_size_nr(node, slot);
4560 /* Iterate all entry of INODE_EXTREF */
4561 while (cur < total) {
4562 ret = INODE_REF_MISSING;
4564 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4565 ref_index = btrfs_inode_extref_index(node, extref);
4566 parent = btrfs_inode_extref_parent(node, extref);
4567 if (index != (u64)-1 && index != ref_index)
4570 if (parent != dir_id)
4573 if (ref_namelen <= BTRFS_NAME_LEN) {
4576 len = BTRFS_NAME_LEN;
4577 warning("root %llu INODE %s[%llu %llu] name too long",
4579 key->type == BTRFS_INODE_REF_KEY ?
4581 key->objectid, key->offset);
4583 read_extent_buffer(node, ref_namebuf,
4584 (unsigned long)(extref + 1), len);
4586 if (len != namelen || strncmp(ref_namebuf, name, len))
4593 len = sizeof(*extref) + ref_namelen;
4594 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4599 btrfs_release_path(&path);
4604 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4605 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4607 * @root: the root of the fs/file tree
4608 * @key: the key of the INODE_REF/INODE_EXTREF
4609 * @size: the st_size of the INODE_ITEM
4610 * @ext_ref: the EXTENDED_IREF feature
4612 * Return 0 if no error occurred.
4614 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4615 struct extent_buffer *node, int slot, u64 *size,
4616 unsigned int ext_ref)
4618 struct btrfs_dir_item *di;
4619 struct btrfs_inode_item *ii;
4620 struct btrfs_path path;
4621 struct btrfs_key location;
4622 char namebuf[BTRFS_NAME_LEN] = {0};
4635 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4636 * ignore index check.
4638 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4640 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4641 total = btrfs_item_size_nr(node, slot);
4643 while (cur < total) {
4644 data_len = btrfs_dir_data_len(node, di);
4646 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4647 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4648 "DIR_ITEM" : "DIR_INDEX",
4649 key->objectid, key->offset, data_len);
4651 name_len = btrfs_dir_name_len(node, di);
4652 if (cur + sizeof(*di) + name_len > total ||
4653 name_len > BTRFS_NAME_LEN) {
4654 warning("root %llu %s[%llu %llu] name too long",
4656 key->type == BTRFS_DIR_ITEM_KEY ?
4657 "DIR_ITEM" : "DIR_INDEX",
4658 key->objectid, key->offset);
4660 if (cur + sizeof(*di) > total)
4662 len = min_t(u32, total - cur - sizeof(*di),
4667 (*size) += name_len;
4669 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4670 filetype = btrfs_dir_type(node, di);
4672 btrfs_init_path(&path);
4673 btrfs_dir_item_key_to_cpu(node, di, &location);
4675 /* Ignore related ROOT_ITEM check */
4676 if (location.type == BTRFS_ROOT_ITEM_KEY)
4679 /* Check relative INODE_ITEM(existence/filetype) */
4680 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4682 err |= INODE_ITEM_MISSING;
4683 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4684 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4685 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4686 key->offset, location.objectid, name_len,
4691 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4692 struct btrfs_inode_item);
4693 mode = btrfs_inode_mode(path.nodes[0], ii);
4695 if (imode_to_type(mode) != filetype) {
4696 err |= INODE_ITEM_MISMATCH;
4697 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4698 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4699 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4700 key->offset, name_len, namebuf, filetype);
4703 /* Check relative INODE_REF/INODE_EXTREF */
4704 location.type = BTRFS_INODE_REF_KEY;
4705 location.offset = key->objectid;
4706 ret = find_inode_ref(root, &location, namebuf, len,
4709 if (ret & INODE_REF_MISSING)
4710 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4711 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4712 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4713 key->offset, name_len, namebuf, filetype);
4716 btrfs_release_path(&path);
4717 len = sizeof(*di) + name_len + data_len;
4718 di = (struct btrfs_dir_item *)((char *)di + len);
4721 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4722 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4723 root->objectid, key->objectid, key->offset);
4732 * Check file extent datasum/hole, update the size of the file extents,
4733 * check and update the last offset of the file extent.
4735 * @root: the root of fs/file tree.
4736 * @fkey: the key of the file extent.
4737 * @nodatasum: INODE_NODATASUM feature.
4738 * @size: the sum of all EXTENT_DATA items size for this inode.
4739 * @end: the offset of the last extent.
4741 * Return 0 if no error occurred.
4743 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4744 struct extent_buffer *node, int slot,
4745 unsigned int nodatasum, u64 *size, u64 *end)
4747 struct btrfs_file_extent_item *fi;
4750 u64 extent_num_bytes;
4752 u64 csum_found; /* In byte size, sectorsize aligned */
4753 u64 search_start; /* Logical range start we search for csum */
4754 u64 search_len; /* Logical range len we search for csum */
4755 unsigned int extent_type;
4756 unsigned int is_hole;
4761 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4763 /* Check inline extent */
4764 extent_type = btrfs_file_extent_type(node, fi);
4765 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4766 struct btrfs_item *e = btrfs_item_nr(slot);
4767 u32 item_inline_len;
4769 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4770 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4771 compressed = btrfs_file_extent_compression(node, fi);
4772 if (extent_num_bytes == 0) {
4774 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4775 root->objectid, fkey->objectid, fkey->offset);
4776 err |= FILE_EXTENT_ERROR;
4778 if (!compressed && extent_num_bytes != item_inline_len) {
4780 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4781 root->objectid, fkey->objectid, fkey->offset,
4782 extent_num_bytes, item_inline_len);
4783 err |= FILE_EXTENT_ERROR;
4785 *end += extent_num_bytes;
4786 *size += extent_num_bytes;
4790 /* Check extent type */
4791 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4792 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4793 err |= FILE_EXTENT_ERROR;
4794 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4795 root->objectid, fkey->objectid, fkey->offset);
4799 /* Check REG_EXTENT/PREALLOC_EXTENT */
4800 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4801 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4802 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4803 extent_offset = btrfs_file_extent_offset(node, fi);
4804 compressed = btrfs_file_extent_compression(node, fi);
4805 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4808 * Check EXTENT_DATA csum
4810 * For plain (uncompressed) extent, we should only check the range
4811 * we're referring to, as it's possible that part of prealloc extent
4812 * has been written, and has csum:
4814 * |<--- Original large preallocated extent A ---->|
4815 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4818 * For compressed extent, we should check the whole range.
4821 search_start = disk_bytenr + extent_offset;
4822 search_len = extent_num_bytes;
4824 search_start = disk_bytenr;
4825 search_len = disk_num_bytes;
4827 ret = count_csum_range(root, search_start, search_len, &csum_found);
4828 if (csum_found > 0 && nodatasum) {
4829 err |= ODD_CSUM_ITEM;
4830 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4831 root->objectid, fkey->objectid, fkey->offset);
4832 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4833 !is_hole && (ret < 0 || csum_found < search_len)) {
4834 err |= CSUM_ITEM_MISSING;
4835 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4836 root->objectid, fkey->objectid, fkey->offset,
4837 csum_found, search_len);
4838 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4839 err |= ODD_CSUM_ITEM;
4840 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4841 root->objectid, fkey->objectid, fkey->offset, csum_found);
4844 /* Check EXTENT_DATA hole */
4845 if (!no_holes && *end != fkey->offset) {
4846 err |= FILE_EXTENT_ERROR;
4847 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4848 root->objectid, fkey->objectid, fkey->offset);
4851 *end += extent_num_bytes;
4853 *size += extent_num_bytes;
4859 * Check INODE_ITEM and related ITEMs (the same inode number)
4860 * 1. check link count
4861 * 2. check inode ref/extref
4862 * 3. check dir item/index
4864 * @ext_ref: the EXTENDED_IREF feature
4866 * Return 0 if no error occurred.
4867 * Return >0 for error or hit the traversal is done(by error bitmap)
4869 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4870 unsigned int ext_ref)
4872 struct extent_buffer *node;
4873 struct btrfs_inode_item *ii;
4874 struct btrfs_key key;
4883 u64 extent_size = 0;
4885 unsigned int nodatasum;
4890 node = path->nodes[0];
4891 slot = path->slots[0];
4893 btrfs_item_key_to_cpu(node, &key, slot);
4894 inode_id = key.objectid;
4896 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4897 ret = btrfs_next_item(root, path);
4903 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4904 isize = btrfs_inode_size(node, ii);
4905 nbytes = btrfs_inode_nbytes(node, ii);
4906 mode = btrfs_inode_mode(node, ii);
4907 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4908 nlink = btrfs_inode_nlink(node, ii);
4909 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4912 ret = btrfs_next_item(root, path);
4914 /* out will fill 'err' rusing current statistics */
4916 } else if (ret > 0) {
4921 node = path->nodes[0];
4922 slot = path->slots[0];
4923 btrfs_item_key_to_cpu(node, &key, slot);
4924 if (key.objectid != inode_id)
4928 case BTRFS_INODE_REF_KEY:
4929 ret = check_inode_ref(root, &key, node, slot, &refs,
4933 case BTRFS_INODE_EXTREF_KEY:
4934 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4935 warning("root %llu EXTREF[%llu %llu] isn't supported",
4936 root->objectid, key.objectid,
4938 ret = check_inode_extref(root, &key, node, slot, &refs,
4942 case BTRFS_DIR_ITEM_KEY:
4943 case BTRFS_DIR_INDEX_KEY:
4945 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4946 root->objectid, inode_id,
4947 imode_to_type(mode), key.objectid,
4950 ret = check_dir_item(root, &key, node, slot, &size,
4954 case BTRFS_EXTENT_DATA_KEY:
4956 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4957 root->objectid, inode_id, key.objectid,
4960 ret = check_file_extent(root, &key, node, slot,
4961 nodatasum, &extent_size,
4965 case BTRFS_XATTR_ITEM_KEY:
4968 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4969 key.objectid, key.type, key.offset);
4974 /* verify INODE_ITEM nlink/isize/nbytes */
4977 err |= LINK_COUNT_ERROR;
4978 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4979 root->objectid, inode_id, nlink);
4983 * Just a warning, as dir inode nbytes is just an
4984 * instructive value.
4986 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4987 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4988 root->objectid, inode_id,
4989 root->fs_info->nodesize);
4992 if (isize != size) {
4994 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4995 root->objectid, inode_id, isize, size);
4998 if (nlink != refs) {
4999 err |= LINK_COUNT_ERROR;
5000 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5001 root->objectid, inode_id, nlink, refs);
5002 } else if (!nlink) {
5006 if (!nbytes && !no_holes && extent_end < isize) {
5007 err |= NBYTES_ERROR;
5008 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5009 root->objectid, inode_id, isize);
5012 if (nbytes != extent_size) {
5013 err |= NBYTES_ERROR;
5014 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5015 root->objectid, inode_id, nbytes, extent_size);
5022 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5024 struct btrfs_path path;
5025 struct btrfs_key key;
5029 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5030 key.type = BTRFS_INODE_ITEM_KEY;
5033 /* For root being dropped, we don't need to check first inode */
5034 if (btrfs_root_refs(&root->root_item) == 0 &&
5035 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5039 btrfs_init_path(&path);
5041 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5046 err |= INODE_ITEM_MISSING;
5047 error("first inode item of root %llu is missing",
5051 err |= check_inode_item(root, &path, ext_ref);
5056 btrfs_release_path(&path);
5061 * Iterate all item on the tree and call check_inode_item() to check.
5063 * @root: the root of the tree to be checked.
5064 * @ext_ref: the EXTENDED_IREF feature
5066 * Return 0 if no error found.
5067 * Return <0 for error.
5069 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5071 struct btrfs_path path;
5072 struct node_refs nrefs;
5073 struct btrfs_root_item *root_item = &root->root_item;
5079 * We need to manually check the first inode item(256)
5080 * As the following traversal function will only start from
5081 * the first inode item in the leaf, if inode item(256) is missing
5082 * we will just skip it forever.
5084 ret = check_fs_first_inode(root, ext_ref);
5088 memset(&nrefs, 0, sizeof(nrefs));
5089 level = btrfs_header_level(root->node);
5090 btrfs_init_path(&path);
5092 if (btrfs_root_refs(root_item) > 0 ||
5093 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5094 path.nodes[level] = root->node;
5095 path.slots[level] = 0;
5096 extent_buffer_get(root->node);
5098 struct btrfs_key key;
5100 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5101 level = root_item->drop_level;
5102 path.lowest_level = level;
5103 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5110 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5113 /* if ret is negative, walk shall stop */
5119 ret = walk_up_tree_v2(root, &path, &level);
5121 /* Normal exit, reset ret to err */
5128 btrfs_release_path(&path);
5133 * Find the relative ref for root_ref and root_backref.
5135 * @root: the root of the root tree.
5136 * @ref_key: the key of the root ref.
5138 * Return 0 if no error occurred.
5140 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5141 struct extent_buffer *node, int slot)
5143 struct btrfs_path path;
5144 struct btrfs_key key;
5145 struct btrfs_root_ref *ref;
5146 struct btrfs_root_ref *backref;
5147 char ref_name[BTRFS_NAME_LEN] = {0};
5148 char backref_name[BTRFS_NAME_LEN] = {0};
5154 u32 backref_namelen;
5159 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5160 ref_dirid = btrfs_root_ref_dirid(node, ref);
5161 ref_seq = btrfs_root_ref_sequence(node, ref);
5162 ref_namelen = btrfs_root_ref_name_len(node, ref);
5164 if (ref_namelen <= BTRFS_NAME_LEN) {
5167 len = BTRFS_NAME_LEN;
5168 warning("%s[%llu %llu] ref_name too long",
5169 ref_key->type == BTRFS_ROOT_REF_KEY ?
5170 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5173 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5175 /* Find relative root_ref */
5176 key.objectid = ref_key->offset;
5177 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5178 key.offset = ref_key->objectid;
5180 btrfs_init_path(&path);
5181 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5183 err |= ROOT_REF_MISSING;
5184 error("%s[%llu %llu] couldn't find relative ref",
5185 ref_key->type == BTRFS_ROOT_REF_KEY ?
5186 "ROOT_REF" : "ROOT_BACKREF",
5187 ref_key->objectid, ref_key->offset);
5191 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5192 struct btrfs_root_ref);
5193 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5194 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5195 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5197 if (backref_namelen <= BTRFS_NAME_LEN) {
5198 len = backref_namelen;
5200 len = BTRFS_NAME_LEN;
5201 warning("%s[%llu %llu] ref_name too long",
5202 key.type == BTRFS_ROOT_REF_KEY ?
5203 "ROOT_REF" : "ROOT_BACKREF",
5204 key.objectid, key.offset);
5206 read_extent_buffer(path.nodes[0], backref_name,
5207 (unsigned long)(backref + 1), len);
5209 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5210 ref_namelen != backref_namelen ||
5211 strncmp(ref_name, backref_name, len)) {
5212 err |= ROOT_REF_MISMATCH;
5213 error("%s[%llu %llu] mismatch relative ref",
5214 ref_key->type == BTRFS_ROOT_REF_KEY ?
5215 "ROOT_REF" : "ROOT_BACKREF",
5216 ref_key->objectid, ref_key->offset);
5219 btrfs_release_path(&path);
5224 * Check all fs/file tree in low_memory mode.
5226 * 1. for fs tree root item, call check_fs_root_v2()
5227 * 2. for fs tree root ref/backref, call check_root_ref()
5229 * Return 0 if no error occurred.
5231 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5233 struct btrfs_root *tree_root = fs_info->tree_root;
5234 struct btrfs_root *cur_root = NULL;
5235 struct btrfs_path path;
5236 struct btrfs_key key;
5237 struct extent_buffer *node;
5238 unsigned int ext_ref;
5243 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5245 btrfs_init_path(&path);
5246 key.objectid = BTRFS_FS_TREE_OBJECTID;
5248 key.type = BTRFS_ROOT_ITEM_KEY;
5250 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5254 } else if (ret > 0) {
5260 node = path.nodes[0];
5261 slot = path.slots[0];
5262 btrfs_item_key_to_cpu(node, &key, slot);
5263 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5265 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5266 fs_root_objectid(key.objectid)) {
5267 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5268 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5271 key.offset = (u64)-1;
5272 cur_root = btrfs_read_fs_root(fs_info, &key);
5275 if (IS_ERR(cur_root)) {
5276 error("Fail to read fs/subvol tree: %lld",
5282 ret = check_fs_root_v2(cur_root, ext_ref);
5285 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5286 btrfs_free_fs_root(cur_root);
5287 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5288 key.type == BTRFS_ROOT_BACKREF_KEY) {
5289 ret = check_root_ref(tree_root, &key, node, slot);
5293 ret = btrfs_next_item(tree_root, &path);
5303 btrfs_release_path(&path);
5307 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5309 struct list_head *cur = rec->backrefs.next;
5310 struct extent_backref *back;
5311 struct tree_backref *tback;
5312 struct data_backref *dback;
5316 while(cur != &rec->backrefs) {
5317 back = to_extent_backref(cur);
5319 if (!back->found_extent_tree) {
5323 if (back->is_data) {
5324 dback = to_data_backref(back);
5325 fprintf(stderr, "Backref %llu %s %llu"
5326 " owner %llu offset %llu num_refs %lu"
5327 " not found in extent tree\n",
5328 (unsigned long long)rec->start,
5329 back->full_backref ?
5331 back->full_backref ?
5332 (unsigned long long)dback->parent:
5333 (unsigned long long)dback->root,
5334 (unsigned long long)dback->owner,
5335 (unsigned long long)dback->offset,
5336 (unsigned long)dback->num_refs);
5338 tback = to_tree_backref(back);
5339 fprintf(stderr, "Backref %llu parent %llu"
5340 " root %llu not found in extent tree\n",
5341 (unsigned long long)rec->start,
5342 (unsigned long long)tback->parent,
5343 (unsigned long long)tback->root);
5346 if (!back->is_data && !back->found_ref) {
5350 tback = to_tree_backref(back);
5351 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5352 (unsigned long long)rec->start,
5353 back->full_backref ? "parent" : "root",
5354 back->full_backref ?
5355 (unsigned long long)tback->parent :
5356 (unsigned long long)tback->root, back);
5358 if (back->is_data) {
5359 dback = to_data_backref(back);
5360 if (dback->found_ref != dback->num_refs) {
5364 fprintf(stderr, "Incorrect local backref count"
5365 " on %llu %s %llu owner %llu"
5366 " offset %llu found %u wanted %u back %p\n",
5367 (unsigned long long)rec->start,
5368 back->full_backref ?
5370 back->full_backref ?
5371 (unsigned long long)dback->parent:
5372 (unsigned long long)dback->root,
5373 (unsigned long long)dback->owner,
5374 (unsigned long long)dback->offset,
5375 dback->found_ref, dback->num_refs, back);
5377 if (dback->disk_bytenr != rec->start) {
5381 fprintf(stderr, "Backref disk bytenr does not"
5382 " match extent record, bytenr=%llu, "
5383 "ref bytenr=%llu\n",
5384 (unsigned long long)rec->start,
5385 (unsigned long long)dback->disk_bytenr);
5388 if (dback->bytes != rec->nr) {
5392 fprintf(stderr, "Backref bytes do not match "
5393 "extent backref, bytenr=%llu, ref "
5394 "bytes=%llu, backref bytes=%llu\n",
5395 (unsigned long long)rec->start,
5396 (unsigned long long)rec->nr,
5397 (unsigned long long)dback->bytes);
5400 if (!back->is_data) {
5403 dback = to_data_backref(back);
5404 found += dback->found_ref;
5407 if (found != rec->refs) {
5411 fprintf(stderr, "Incorrect global backref count "
5412 "on %llu found %llu wanted %llu\n",
5413 (unsigned long long)rec->start,
5414 (unsigned long long)found,
5415 (unsigned long long)rec->refs);
5421 static int free_all_extent_backrefs(struct extent_record *rec)
5423 struct extent_backref *back;
5424 struct list_head *cur;
5425 while (!list_empty(&rec->backrefs)) {
5426 cur = rec->backrefs.next;
5427 back = to_extent_backref(cur);
5434 static void free_extent_record_cache(struct cache_tree *extent_cache)
5436 struct cache_extent *cache;
5437 struct extent_record *rec;
5440 cache = first_cache_extent(extent_cache);
5443 rec = container_of(cache, struct extent_record, cache);
5444 remove_cache_extent(extent_cache, cache);
5445 free_all_extent_backrefs(rec);
5450 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5451 struct extent_record *rec)
5453 if (rec->content_checked && rec->owner_ref_checked &&
5454 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5455 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5456 !rec->bad_full_backref && !rec->crossing_stripes &&
5457 !rec->wrong_chunk_type) {
5458 remove_cache_extent(extent_cache, &rec->cache);
5459 free_all_extent_backrefs(rec);
5460 list_del_init(&rec->list);
5466 static int check_owner_ref(struct btrfs_root *root,
5467 struct extent_record *rec,
5468 struct extent_buffer *buf)
5470 struct extent_backref *node;
5471 struct tree_backref *back;
5472 struct btrfs_root *ref_root;
5473 struct btrfs_key key;
5474 struct btrfs_path path;
5475 struct extent_buffer *parent;
5480 list_for_each_entry(node, &rec->backrefs, list) {
5483 if (!node->found_ref)
5485 if (node->full_backref)
5487 back = to_tree_backref(node);
5488 if (btrfs_header_owner(buf) == back->root)
5491 BUG_ON(rec->is_root);
5493 /* try to find the block by search corresponding fs tree */
5494 key.objectid = btrfs_header_owner(buf);
5495 key.type = BTRFS_ROOT_ITEM_KEY;
5496 key.offset = (u64)-1;
5498 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5499 if (IS_ERR(ref_root))
5502 level = btrfs_header_level(buf);
5504 btrfs_item_key_to_cpu(buf, &key, 0);
5506 btrfs_node_key_to_cpu(buf, &key, 0);
5508 btrfs_init_path(&path);
5509 path.lowest_level = level + 1;
5510 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5514 parent = path.nodes[level + 1];
5515 if (parent && buf->start == btrfs_node_blockptr(parent,
5516 path.slots[level + 1]))
5519 btrfs_release_path(&path);
5520 return found ? 0 : 1;
5523 static int is_extent_tree_record(struct extent_record *rec)
5525 struct list_head *cur = rec->backrefs.next;
5526 struct extent_backref *node;
5527 struct tree_backref *back;
5530 while(cur != &rec->backrefs) {
5531 node = to_extent_backref(cur);
5535 back = to_tree_backref(node);
5536 if (node->full_backref)
5538 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5545 static int record_bad_block_io(struct btrfs_fs_info *info,
5546 struct cache_tree *extent_cache,
5549 struct extent_record *rec;
5550 struct cache_extent *cache;
5551 struct btrfs_key key;
5553 cache = lookup_cache_extent(extent_cache, start, len);
5557 rec = container_of(cache, struct extent_record, cache);
5558 if (!is_extent_tree_record(rec))
5561 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5562 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5565 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5566 struct extent_buffer *buf, int slot)
5568 if (btrfs_header_level(buf)) {
5569 struct btrfs_key_ptr ptr1, ptr2;
5571 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5572 sizeof(struct btrfs_key_ptr));
5573 read_extent_buffer(buf, &ptr2,
5574 btrfs_node_key_ptr_offset(slot + 1),
5575 sizeof(struct btrfs_key_ptr));
5576 write_extent_buffer(buf, &ptr1,
5577 btrfs_node_key_ptr_offset(slot + 1),
5578 sizeof(struct btrfs_key_ptr));
5579 write_extent_buffer(buf, &ptr2,
5580 btrfs_node_key_ptr_offset(slot),
5581 sizeof(struct btrfs_key_ptr));
5583 struct btrfs_disk_key key;
5584 btrfs_node_key(buf, &key, 0);
5585 btrfs_fixup_low_keys(root, path, &key,
5586 btrfs_header_level(buf) + 1);
5589 struct btrfs_item *item1, *item2;
5590 struct btrfs_key k1, k2;
5591 char *item1_data, *item2_data;
5592 u32 item1_offset, item2_offset, item1_size, item2_size;
5594 item1 = btrfs_item_nr(slot);
5595 item2 = btrfs_item_nr(slot + 1);
5596 btrfs_item_key_to_cpu(buf, &k1, slot);
5597 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5598 item1_offset = btrfs_item_offset(buf, item1);
5599 item2_offset = btrfs_item_offset(buf, item2);
5600 item1_size = btrfs_item_size(buf, item1);
5601 item2_size = btrfs_item_size(buf, item2);
5603 item1_data = malloc(item1_size);
5606 item2_data = malloc(item2_size);
5612 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5613 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5615 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5616 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5620 btrfs_set_item_offset(buf, item1, item2_offset);
5621 btrfs_set_item_offset(buf, item2, item1_offset);
5622 btrfs_set_item_size(buf, item1, item2_size);
5623 btrfs_set_item_size(buf, item2, item1_size);
5625 path->slots[0] = slot;
5626 btrfs_set_item_key_unsafe(root, path, &k2);
5627 path->slots[0] = slot + 1;
5628 btrfs_set_item_key_unsafe(root, path, &k1);
5633 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5635 struct extent_buffer *buf;
5636 struct btrfs_key k1, k2;
5638 int level = path->lowest_level;
5641 buf = path->nodes[level];
5642 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5644 btrfs_node_key_to_cpu(buf, &k1, i);
5645 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5647 btrfs_item_key_to_cpu(buf, &k1, i);
5648 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5650 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5652 ret = swap_values(root, path, buf, i);
5655 btrfs_mark_buffer_dirty(buf);
5661 static int delete_bogus_item(struct btrfs_root *root,
5662 struct btrfs_path *path,
5663 struct extent_buffer *buf, int slot)
5665 struct btrfs_key key;
5666 int nritems = btrfs_header_nritems(buf);
5668 btrfs_item_key_to_cpu(buf, &key, slot);
5670 /* These are all the keys we can deal with missing. */
5671 if (key.type != BTRFS_DIR_INDEX_KEY &&
5672 key.type != BTRFS_EXTENT_ITEM_KEY &&
5673 key.type != BTRFS_METADATA_ITEM_KEY &&
5674 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5675 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5678 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5679 (unsigned long long)key.objectid, key.type,
5680 (unsigned long long)key.offset, slot, buf->start);
5681 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5682 btrfs_item_nr_offset(slot + 1),
5683 sizeof(struct btrfs_item) *
5684 (nritems - slot - 1));
5685 btrfs_set_header_nritems(buf, nritems - 1);
5687 struct btrfs_disk_key disk_key;
5689 btrfs_item_key(buf, &disk_key, 0);
5690 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5692 btrfs_mark_buffer_dirty(buf);
5696 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5698 struct extent_buffer *buf;
5702 /* We should only get this for leaves */
5703 BUG_ON(path->lowest_level);
5704 buf = path->nodes[0];
5706 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5707 unsigned int shift = 0, offset;
5709 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5710 BTRFS_LEAF_DATA_SIZE(root)) {
5711 if (btrfs_item_end_nr(buf, i) >
5712 BTRFS_LEAF_DATA_SIZE(root)) {
5713 ret = delete_bogus_item(root, path, buf, i);
5716 fprintf(stderr, "item is off the end of the "
5717 "leaf, can't fix\n");
5721 shift = BTRFS_LEAF_DATA_SIZE(root) -
5722 btrfs_item_end_nr(buf, i);
5723 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5724 btrfs_item_offset_nr(buf, i - 1)) {
5725 if (btrfs_item_end_nr(buf, i) >
5726 btrfs_item_offset_nr(buf, i - 1)) {
5727 ret = delete_bogus_item(root, path, buf, i);
5730 fprintf(stderr, "items overlap, can't fix\n");
5734 shift = btrfs_item_offset_nr(buf, i - 1) -
5735 btrfs_item_end_nr(buf, i);
5740 printf("Shifting item nr %d by %u bytes in block %llu\n",
5741 i, shift, (unsigned long long)buf->start);
5742 offset = btrfs_item_offset_nr(buf, i);
5743 memmove_extent_buffer(buf,
5744 btrfs_leaf_data(buf) + offset + shift,
5745 btrfs_leaf_data(buf) + offset,
5746 btrfs_item_size_nr(buf, i));
5747 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5749 btrfs_mark_buffer_dirty(buf);
5753 * We may have moved things, in which case we want to exit so we don't
5754 * write those changes out. Once we have proper abort functionality in
5755 * progs this can be changed to something nicer.
5762 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5763 * then just return -EIO.
5765 static int try_to_fix_bad_block(struct btrfs_root *root,
5766 struct extent_buffer *buf,
5767 enum btrfs_tree_block_status status)
5769 struct btrfs_trans_handle *trans;
5770 struct ulist *roots;
5771 struct ulist_node *node;
5772 struct btrfs_root *search_root;
5773 struct btrfs_path path;
5774 struct ulist_iterator iter;
5775 struct btrfs_key root_key, key;
5778 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5779 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5782 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5786 btrfs_init_path(&path);
5787 ULIST_ITER_INIT(&iter);
5788 while ((node = ulist_next(roots, &iter))) {
5789 root_key.objectid = node->val;
5790 root_key.type = BTRFS_ROOT_ITEM_KEY;
5791 root_key.offset = (u64)-1;
5793 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5800 trans = btrfs_start_transaction(search_root, 0);
5801 if (IS_ERR(trans)) {
5802 ret = PTR_ERR(trans);
5806 path.lowest_level = btrfs_header_level(buf);
5807 path.skip_check_block = 1;
5808 if (path.lowest_level)
5809 btrfs_node_key_to_cpu(buf, &key, 0);
5811 btrfs_item_key_to_cpu(buf, &key, 0);
5812 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5815 btrfs_commit_transaction(trans, search_root);
5818 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5819 ret = fix_key_order(search_root, &path);
5820 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5821 ret = fix_item_offset(search_root, &path);
5823 btrfs_commit_transaction(trans, search_root);
5826 btrfs_release_path(&path);
5827 btrfs_commit_transaction(trans, search_root);
5830 btrfs_release_path(&path);
5834 static int check_block(struct btrfs_root *root,
5835 struct cache_tree *extent_cache,
5836 struct extent_buffer *buf, u64 flags)
5838 struct extent_record *rec;
5839 struct cache_extent *cache;
5840 struct btrfs_key key;
5841 enum btrfs_tree_block_status status;
5845 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5848 rec = container_of(cache, struct extent_record, cache);
5849 rec->generation = btrfs_header_generation(buf);
5851 level = btrfs_header_level(buf);
5852 if (btrfs_header_nritems(buf) > 0) {
5855 btrfs_item_key_to_cpu(buf, &key, 0);
5857 btrfs_node_key_to_cpu(buf, &key, 0);
5859 rec->info_objectid = key.objectid;
5861 rec->info_level = level;
5863 if (btrfs_is_leaf(buf))
5864 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5866 status = btrfs_check_node(root, &rec->parent_key, buf);
5868 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5870 status = try_to_fix_bad_block(root, buf, status);
5871 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5873 fprintf(stderr, "bad block %llu\n",
5874 (unsigned long long)buf->start);
5877 * Signal to callers we need to start the scan over
5878 * again since we'll have cowed blocks.
5883 rec->content_checked = 1;
5884 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5885 rec->owner_ref_checked = 1;
5887 ret = check_owner_ref(root, rec, buf);
5889 rec->owner_ref_checked = 1;
5893 maybe_free_extent_rec(extent_cache, rec);
5897 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5898 u64 parent, u64 root)
5900 struct list_head *cur = rec->backrefs.next;
5901 struct extent_backref *node;
5902 struct tree_backref *back;
5904 while(cur != &rec->backrefs) {
5905 node = to_extent_backref(cur);
5909 back = to_tree_backref(node);
5911 if (!node->full_backref)
5913 if (parent == back->parent)
5916 if (node->full_backref)
5918 if (back->root == root)
5925 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5926 u64 parent, u64 root)
5928 struct tree_backref *ref = malloc(sizeof(*ref));
5932 memset(&ref->node, 0, sizeof(ref->node));
5934 ref->parent = parent;
5935 ref->node.full_backref = 1;
5938 ref->node.full_backref = 0;
5940 list_add_tail(&ref->node.list, &rec->backrefs);
5945 static struct data_backref *find_data_backref(struct extent_record *rec,
5946 u64 parent, u64 root,
5947 u64 owner, u64 offset,
5949 u64 disk_bytenr, u64 bytes)
5951 struct list_head *cur = rec->backrefs.next;
5952 struct extent_backref *node;
5953 struct data_backref *back;
5955 while(cur != &rec->backrefs) {
5956 node = to_extent_backref(cur);
5960 back = to_data_backref(node);
5962 if (!node->full_backref)
5964 if (parent == back->parent)
5967 if (node->full_backref)
5969 if (back->root == root && back->owner == owner &&
5970 back->offset == offset) {
5971 if (found_ref && node->found_ref &&
5972 (back->bytes != bytes ||
5973 back->disk_bytenr != disk_bytenr))
5982 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5983 u64 parent, u64 root,
5984 u64 owner, u64 offset,
5987 struct data_backref *ref = malloc(sizeof(*ref));
5991 memset(&ref->node, 0, sizeof(ref->node));
5992 ref->node.is_data = 1;
5995 ref->parent = parent;
5998 ref->node.full_backref = 1;
6002 ref->offset = offset;
6003 ref->node.full_backref = 0;
6005 ref->bytes = max_size;
6008 list_add_tail(&ref->node.list, &rec->backrefs);
6009 if (max_size > rec->max_size)
6010 rec->max_size = max_size;
6014 /* Check if the type of extent matches with its chunk */
6015 static void check_extent_type(struct extent_record *rec)
6017 struct btrfs_block_group_cache *bg_cache;
6019 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6023 /* data extent, check chunk directly*/
6024 if (!rec->metadata) {
6025 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6026 rec->wrong_chunk_type = 1;
6030 /* metadata extent, check the obvious case first */
6031 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6032 BTRFS_BLOCK_GROUP_METADATA))) {
6033 rec->wrong_chunk_type = 1;
6038 * Check SYSTEM extent, as it's also marked as metadata, we can only
6039 * make sure it's a SYSTEM extent by its backref
6041 if (!list_empty(&rec->backrefs)) {
6042 struct extent_backref *node;
6043 struct tree_backref *tback;
6046 node = to_extent_backref(rec->backrefs.next);
6047 if (node->is_data) {
6048 /* tree block shouldn't have data backref */
6049 rec->wrong_chunk_type = 1;
6052 tback = container_of(node, struct tree_backref, node);
6054 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6055 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6057 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6058 if (!(bg_cache->flags & bg_type))
6059 rec->wrong_chunk_type = 1;
6064 * Allocate a new extent record, fill default values from @tmpl and insert int
6065 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6066 * the cache, otherwise it fails.
6068 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6069 struct extent_record *tmpl)
6071 struct extent_record *rec;
6074 BUG_ON(tmpl->max_size == 0);
6075 rec = malloc(sizeof(*rec));
6078 rec->start = tmpl->start;
6079 rec->max_size = tmpl->max_size;
6080 rec->nr = max(tmpl->nr, tmpl->max_size);
6081 rec->found_rec = tmpl->found_rec;
6082 rec->content_checked = tmpl->content_checked;
6083 rec->owner_ref_checked = tmpl->owner_ref_checked;
6084 rec->num_duplicates = 0;
6085 rec->metadata = tmpl->metadata;
6086 rec->flag_block_full_backref = FLAG_UNSET;
6087 rec->bad_full_backref = 0;
6088 rec->crossing_stripes = 0;
6089 rec->wrong_chunk_type = 0;
6090 rec->is_root = tmpl->is_root;
6091 rec->refs = tmpl->refs;
6092 rec->extent_item_refs = tmpl->extent_item_refs;
6093 rec->parent_generation = tmpl->parent_generation;
6094 INIT_LIST_HEAD(&rec->backrefs);
6095 INIT_LIST_HEAD(&rec->dups);
6096 INIT_LIST_HEAD(&rec->list);
6097 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6098 rec->cache.start = tmpl->start;
6099 rec->cache.size = tmpl->nr;
6100 ret = insert_cache_extent(extent_cache, &rec->cache);
6105 bytes_used += rec->nr;
6108 rec->crossing_stripes = check_crossing_stripes(global_info,
6109 rec->start, global_info->nodesize);
6110 check_extent_type(rec);
6115 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6117 * - refs - if found, increase refs
6118 * - is_root - if found, set
6119 * - content_checked - if found, set
6120 * - owner_ref_checked - if found, set
6122 * If not found, create a new one, initialize and insert.
6124 static int add_extent_rec(struct cache_tree *extent_cache,
6125 struct extent_record *tmpl)
6127 struct extent_record *rec;
6128 struct cache_extent *cache;
6132 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6134 rec = container_of(cache, struct extent_record, cache);
6138 rec->nr = max(tmpl->nr, tmpl->max_size);
6141 * We need to make sure to reset nr to whatever the extent
6142 * record says was the real size, this way we can compare it to
6145 if (tmpl->found_rec) {
6146 if (tmpl->start != rec->start || rec->found_rec) {
6147 struct extent_record *tmp;
6150 if (list_empty(&rec->list))
6151 list_add_tail(&rec->list,
6152 &duplicate_extents);
6155 * We have to do this song and dance in case we
6156 * find an extent record that falls inside of
6157 * our current extent record but does not have
6158 * the same objectid.
6160 tmp = malloc(sizeof(*tmp));
6163 tmp->start = tmpl->start;
6164 tmp->max_size = tmpl->max_size;
6167 tmp->metadata = tmpl->metadata;
6168 tmp->extent_item_refs = tmpl->extent_item_refs;
6169 INIT_LIST_HEAD(&tmp->list);
6170 list_add_tail(&tmp->list, &rec->dups);
6171 rec->num_duplicates++;
6178 if (tmpl->extent_item_refs && !dup) {
6179 if (rec->extent_item_refs) {
6180 fprintf(stderr, "block %llu rec "
6181 "extent_item_refs %llu, passed %llu\n",
6182 (unsigned long long)tmpl->start,
6183 (unsigned long long)
6184 rec->extent_item_refs,
6185 (unsigned long long)tmpl->extent_item_refs);
6187 rec->extent_item_refs = tmpl->extent_item_refs;
6191 if (tmpl->content_checked)
6192 rec->content_checked = 1;
6193 if (tmpl->owner_ref_checked)
6194 rec->owner_ref_checked = 1;
6195 memcpy(&rec->parent_key, &tmpl->parent_key,
6196 sizeof(tmpl->parent_key));
6197 if (tmpl->parent_generation)
6198 rec->parent_generation = tmpl->parent_generation;
6199 if (rec->max_size < tmpl->max_size)
6200 rec->max_size = tmpl->max_size;
6203 * A metadata extent can't cross stripe_len boundary, otherwise
6204 * kernel scrub won't be able to handle it.
6205 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6209 rec->crossing_stripes = check_crossing_stripes(
6210 global_info, rec->start,
6211 global_info->nodesize);
6212 check_extent_type(rec);
6213 maybe_free_extent_rec(extent_cache, rec);
6217 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6222 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6223 u64 parent, u64 root, int found_ref)
6225 struct extent_record *rec;
6226 struct tree_backref *back;
6227 struct cache_extent *cache;
6230 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6232 struct extent_record tmpl;
6234 memset(&tmpl, 0, sizeof(tmpl));
6235 tmpl.start = bytenr;
6240 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6244 /* really a bug in cache_extent implement now */
6245 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6250 rec = container_of(cache, struct extent_record, cache);
6251 if (rec->start != bytenr) {
6253 * Several cause, from unaligned bytenr to over lapping extents
6258 back = find_tree_backref(rec, parent, root);
6260 back = alloc_tree_backref(rec, parent, root);
6266 if (back->node.found_ref) {
6267 fprintf(stderr, "Extent back ref already exists "
6268 "for %llu parent %llu root %llu \n",
6269 (unsigned long long)bytenr,
6270 (unsigned long long)parent,
6271 (unsigned long long)root);
6273 back->node.found_ref = 1;
6275 if (back->node.found_extent_tree) {
6276 fprintf(stderr, "Extent back ref already exists "
6277 "for %llu parent %llu root %llu \n",
6278 (unsigned long long)bytenr,
6279 (unsigned long long)parent,
6280 (unsigned long long)root);
6282 back->node.found_extent_tree = 1;
6284 check_extent_type(rec);
6285 maybe_free_extent_rec(extent_cache, rec);
6289 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6290 u64 parent, u64 root, u64 owner, u64 offset,
6291 u32 num_refs, int found_ref, u64 max_size)
6293 struct extent_record *rec;
6294 struct data_backref *back;
6295 struct cache_extent *cache;
6298 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6300 struct extent_record tmpl;
6302 memset(&tmpl, 0, sizeof(tmpl));
6303 tmpl.start = bytenr;
6305 tmpl.max_size = max_size;
6307 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6311 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6316 rec = container_of(cache, struct extent_record, cache);
6317 if (rec->max_size < max_size)
6318 rec->max_size = max_size;
6321 * If found_ref is set then max_size is the real size and must match the
6322 * existing refs. So if we have already found a ref then we need to
6323 * make sure that this ref matches the existing one, otherwise we need
6324 * to add a new backref so we can notice that the backrefs don't match
6325 * and we need to figure out who is telling the truth. This is to
6326 * account for that awful fsync bug I introduced where we'd end up with
6327 * a btrfs_file_extent_item that would have its length include multiple
6328 * prealloc extents or point inside of a prealloc extent.
6330 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6333 back = alloc_data_backref(rec, parent, root, owner, offset,
6339 BUG_ON(num_refs != 1);
6340 if (back->node.found_ref)
6341 BUG_ON(back->bytes != max_size);
6342 back->node.found_ref = 1;
6343 back->found_ref += 1;
6344 back->bytes = max_size;
6345 back->disk_bytenr = bytenr;
6347 rec->content_checked = 1;
6348 rec->owner_ref_checked = 1;
6350 if (back->node.found_extent_tree) {
6351 fprintf(stderr, "Extent back ref already exists "
6352 "for %llu parent %llu root %llu "
6353 "owner %llu offset %llu num_refs %lu\n",
6354 (unsigned long long)bytenr,
6355 (unsigned long long)parent,
6356 (unsigned long long)root,
6357 (unsigned long long)owner,
6358 (unsigned long long)offset,
6359 (unsigned long)num_refs);
6361 back->num_refs = num_refs;
6362 back->node.found_extent_tree = 1;
6364 maybe_free_extent_rec(extent_cache, rec);
6368 static int add_pending(struct cache_tree *pending,
6369 struct cache_tree *seen, u64 bytenr, u32 size)
6372 ret = add_cache_extent(seen, bytenr, size);
6375 add_cache_extent(pending, bytenr, size);
6379 static int pick_next_pending(struct cache_tree *pending,
6380 struct cache_tree *reada,
6381 struct cache_tree *nodes,
6382 u64 last, struct block_info *bits, int bits_nr,
6385 unsigned long node_start = last;
6386 struct cache_extent *cache;
6389 cache = search_cache_extent(reada, 0);
6391 bits[0].start = cache->start;
6392 bits[0].size = cache->size;
6397 if (node_start > 32768)
6398 node_start -= 32768;
6400 cache = search_cache_extent(nodes, node_start);
6402 cache = search_cache_extent(nodes, 0);
6405 cache = search_cache_extent(pending, 0);
6410 bits[ret].start = cache->start;
6411 bits[ret].size = cache->size;
6412 cache = next_cache_extent(cache);
6414 } while (cache && ret < bits_nr);
6420 bits[ret].start = cache->start;
6421 bits[ret].size = cache->size;
6422 cache = next_cache_extent(cache);
6424 } while (cache && ret < bits_nr);
6426 if (bits_nr - ret > 8) {
6427 u64 lookup = bits[0].start + bits[0].size;
6428 struct cache_extent *next;
6429 next = search_cache_extent(pending, lookup);
6431 if (next->start - lookup > 32768)
6433 bits[ret].start = next->start;
6434 bits[ret].size = next->size;
6435 lookup = next->start + next->size;
6439 next = next_cache_extent(next);
6447 static void free_chunk_record(struct cache_extent *cache)
6449 struct chunk_record *rec;
6451 rec = container_of(cache, struct chunk_record, cache);
6452 list_del_init(&rec->list);
6453 list_del_init(&rec->dextents);
6457 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6459 cache_tree_free_extents(chunk_cache, free_chunk_record);
6462 static void free_device_record(struct rb_node *node)
6464 struct device_record *rec;
6466 rec = container_of(node, struct device_record, node);
6470 FREE_RB_BASED_TREE(device_cache, free_device_record);
6472 int insert_block_group_record(struct block_group_tree *tree,
6473 struct block_group_record *bg_rec)
6477 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6481 list_add_tail(&bg_rec->list, &tree->block_groups);
6485 static void free_block_group_record(struct cache_extent *cache)
6487 struct block_group_record *rec;
6489 rec = container_of(cache, struct block_group_record, cache);
6490 list_del_init(&rec->list);
6494 void free_block_group_tree(struct block_group_tree *tree)
6496 cache_tree_free_extents(&tree->tree, free_block_group_record);
6499 int insert_device_extent_record(struct device_extent_tree *tree,
6500 struct device_extent_record *de_rec)
6505 * Device extent is a bit different from the other extents, because
6506 * the extents which belong to the different devices may have the
6507 * same start and size, so we need use the special extent cache
6508 * search/insert functions.
6510 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6514 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6515 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6519 static void free_device_extent_record(struct cache_extent *cache)
6521 struct device_extent_record *rec;
6523 rec = container_of(cache, struct device_extent_record, cache);
6524 if (!list_empty(&rec->chunk_list))
6525 list_del_init(&rec->chunk_list);
6526 if (!list_empty(&rec->device_list))
6527 list_del_init(&rec->device_list);
6531 void free_device_extent_tree(struct device_extent_tree *tree)
6533 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6536 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6537 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6538 struct extent_buffer *leaf, int slot)
6540 struct btrfs_extent_ref_v0 *ref0;
6541 struct btrfs_key key;
6544 btrfs_item_key_to_cpu(leaf, &key, slot);
6545 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6546 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6547 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6550 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6551 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6557 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6558 struct btrfs_key *key,
6561 struct btrfs_chunk *ptr;
6562 struct chunk_record *rec;
6565 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6566 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6568 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6570 fprintf(stderr, "memory allocation failed\n");
6574 INIT_LIST_HEAD(&rec->list);
6575 INIT_LIST_HEAD(&rec->dextents);
6578 rec->cache.start = key->offset;
6579 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6581 rec->generation = btrfs_header_generation(leaf);
6583 rec->objectid = key->objectid;
6584 rec->type = key->type;
6585 rec->offset = key->offset;
6587 rec->length = rec->cache.size;
6588 rec->owner = btrfs_chunk_owner(leaf, ptr);
6589 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6590 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6591 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6592 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6593 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6594 rec->num_stripes = num_stripes;
6595 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6597 for (i = 0; i < rec->num_stripes; ++i) {
6598 rec->stripes[i].devid =
6599 btrfs_stripe_devid_nr(leaf, ptr, i);
6600 rec->stripes[i].offset =
6601 btrfs_stripe_offset_nr(leaf, ptr, i);
6602 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6603 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6610 static int process_chunk_item(struct cache_tree *chunk_cache,
6611 struct btrfs_key *key, struct extent_buffer *eb,
6614 struct chunk_record *rec;
6615 struct btrfs_chunk *chunk;
6618 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6620 * Do extra check for this chunk item,
6622 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6623 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6624 * and owner<->key_type check.
6626 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6629 error("chunk(%llu, %llu) is not valid, ignore it",
6630 key->offset, btrfs_chunk_length(eb, chunk));
6633 rec = btrfs_new_chunk_record(eb, key, slot);
6634 ret = insert_cache_extent(chunk_cache, &rec->cache);
6636 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6637 rec->offset, rec->length);
6644 static int process_device_item(struct rb_root *dev_cache,
6645 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6647 struct btrfs_dev_item *ptr;
6648 struct device_record *rec;
6651 ptr = btrfs_item_ptr(eb,
6652 slot, struct btrfs_dev_item);
6654 rec = malloc(sizeof(*rec));
6656 fprintf(stderr, "memory allocation failed\n");
6660 rec->devid = key->offset;
6661 rec->generation = btrfs_header_generation(eb);
6663 rec->objectid = key->objectid;
6664 rec->type = key->type;
6665 rec->offset = key->offset;
6667 rec->devid = btrfs_device_id(eb, ptr);
6668 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6669 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6671 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6673 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6680 struct block_group_record *
6681 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6684 struct btrfs_block_group_item *ptr;
6685 struct block_group_record *rec;
6687 rec = calloc(1, sizeof(*rec));
6689 fprintf(stderr, "memory allocation failed\n");
6693 rec->cache.start = key->objectid;
6694 rec->cache.size = key->offset;
6696 rec->generation = btrfs_header_generation(leaf);
6698 rec->objectid = key->objectid;
6699 rec->type = key->type;
6700 rec->offset = key->offset;
6702 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6703 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6705 INIT_LIST_HEAD(&rec->list);
6710 static int process_block_group_item(struct block_group_tree *block_group_cache,
6711 struct btrfs_key *key,
6712 struct extent_buffer *eb, int slot)
6714 struct block_group_record *rec;
6717 rec = btrfs_new_block_group_record(eb, key, slot);
6718 ret = insert_block_group_record(block_group_cache, rec);
6720 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6721 rec->objectid, rec->offset);
6728 struct device_extent_record *
6729 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6730 struct btrfs_key *key, int slot)
6732 struct device_extent_record *rec;
6733 struct btrfs_dev_extent *ptr;
6735 rec = calloc(1, sizeof(*rec));
6737 fprintf(stderr, "memory allocation failed\n");
6741 rec->cache.objectid = key->objectid;
6742 rec->cache.start = key->offset;
6744 rec->generation = btrfs_header_generation(leaf);
6746 rec->objectid = key->objectid;
6747 rec->type = key->type;
6748 rec->offset = key->offset;
6750 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6751 rec->chunk_objecteid =
6752 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6754 btrfs_dev_extent_chunk_offset(leaf, ptr);
6755 rec->length = btrfs_dev_extent_length(leaf, ptr);
6756 rec->cache.size = rec->length;
6758 INIT_LIST_HEAD(&rec->chunk_list);
6759 INIT_LIST_HEAD(&rec->device_list);
6765 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6766 struct btrfs_key *key, struct extent_buffer *eb,
6769 struct device_extent_record *rec;
6772 rec = btrfs_new_device_extent_record(eb, key, slot);
6773 ret = insert_device_extent_record(dev_extent_cache, rec);
6776 "Device extent[%llu, %llu, %llu] existed.\n",
6777 rec->objectid, rec->offset, rec->length);
6784 static int process_extent_item(struct btrfs_root *root,
6785 struct cache_tree *extent_cache,
6786 struct extent_buffer *eb, int slot)
6788 struct btrfs_extent_item *ei;
6789 struct btrfs_extent_inline_ref *iref;
6790 struct btrfs_extent_data_ref *dref;
6791 struct btrfs_shared_data_ref *sref;
6792 struct btrfs_key key;
6793 struct extent_record tmpl;
6798 u32 item_size = btrfs_item_size_nr(eb, slot);
6804 btrfs_item_key_to_cpu(eb, &key, slot);
6806 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6808 num_bytes = root->fs_info->nodesize;
6810 num_bytes = key.offset;
6813 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6814 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6815 key.objectid, root->fs_info->sectorsize);
6818 if (item_size < sizeof(*ei)) {
6819 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6820 struct btrfs_extent_item_v0 *ei0;
6821 BUG_ON(item_size != sizeof(*ei0));
6822 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6823 refs = btrfs_extent_refs_v0(eb, ei0);
6827 memset(&tmpl, 0, sizeof(tmpl));
6828 tmpl.start = key.objectid;
6829 tmpl.nr = num_bytes;
6830 tmpl.extent_item_refs = refs;
6831 tmpl.metadata = metadata;
6833 tmpl.max_size = num_bytes;
6835 return add_extent_rec(extent_cache, &tmpl);
6838 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6839 refs = btrfs_extent_refs(eb, ei);
6840 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6844 if (metadata && num_bytes != root->fs_info->nodesize) {
6845 error("ignore invalid metadata extent, length %llu does not equal to %u",
6846 num_bytes, root->fs_info->nodesize);
6849 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6850 error("ignore invalid data extent, length %llu is not aligned to %u",
6851 num_bytes, root->fs_info->sectorsize);
6855 memset(&tmpl, 0, sizeof(tmpl));
6856 tmpl.start = key.objectid;
6857 tmpl.nr = num_bytes;
6858 tmpl.extent_item_refs = refs;
6859 tmpl.metadata = metadata;
6861 tmpl.max_size = num_bytes;
6862 add_extent_rec(extent_cache, &tmpl);
6864 ptr = (unsigned long)(ei + 1);
6865 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6866 key.type == BTRFS_EXTENT_ITEM_KEY)
6867 ptr += sizeof(struct btrfs_tree_block_info);
6869 end = (unsigned long)ei + item_size;
6871 iref = (struct btrfs_extent_inline_ref *)ptr;
6872 type = btrfs_extent_inline_ref_type(eb, iref);
6873 offset = btrfs_extent_inline_ref_offset(eb, iref);
6875 case BTRFS_TREE_BLOCK_REF_KEY:
6876 ret = add_tree_backref(extent_cache, key.objectid,
6880 "add_tree_backref failed (extent items tree block): %s",
6883 case BTRFS_SHARED_BLOCK_REF_KEY:
6884 ret = add_tree_backref(extent_cache, key.objectid,
6888 "add_tree_backref failed (extent items shared block): %s",
6891 case BTRFS_EXTENT_DATA_REF_KEY:
6892 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6893 add_data_backref(extent_cache, key.objectid, 0,
6894 btrfs_extent_data_ref_root(eb, dref),
6895 btrfs_extent_data_ref_objectid(eb,
6897 btrfs_extent_data_ref_offset(eb, dref),
6898 btrfs_extent_data_ref_count(eb, dref),
6901 case BTRFS_SHARED_DATA_REF_KEY:
6902 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6903 add_data_backref(extent_cache, key.objectid, offset,
6905 btrfs_shared_data_ref_count(eb, sref),
6909 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6910 key.objectid, key.type, num_bytes);
6913 ptr += btrfs_extent_inline_ref_size(type);
6920 static int check_cache_range(struct btrfs_root *root,
6921 struct btrfs_block_group_cache *cache,
6922 u64 offset, u64 bytes)
6924 struct btrfs_free_space *entry;
6930 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6931 bytenr = btrfs_sb_offset(i);
6932 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6933 cache->key.objectid, bytenr, 0,
6934 &logical, &nr, &stripe_len);
6939 if (logical[nr] + stripe_len <= offset)
6941 if (offset + bytes <= logical[nr])
6943 if (logical[nr] == offset) {
6944 if (stripe_len >= bytes) {
6948 bytes -= stripe_len;
6949 offset += stripe_len;
6950 } else if (logical[nr] < offset) {
6951 if (logical[nr] + stripe_len >=
6956 bytes = (offset + bytes) -
6957 (logical[nr] + stripe_len);
6958 offset = logical[nr] + stripe_len;
6961 * Could be tricky, the super may land in the
6962 * middle of the area we're checking. First
6963 * check the easiest case, it's at the end.
6965 if (logical[nr] + stripe_len >=
6967 bytes = logical[nr] - offset;
6971 /* Check the left side */
6972 ret = check_cache_range(root, cache,
6974 logical[nr] - offset);
6980 /* Now we continue with the right side */
6981 bytes = (offset + bytes) -
6982 (logical[nr] + stripe_len);
6983 offset = logical[nr] + stripe_len;
6990 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6992 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6993 offset, offset+bytes);
6997 if (entry->offset != offset) {
6998 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7003 if (entry->bytes != bytes) {
7004 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7005 bytes, entry->bytes, offset);
7009 unlink_free_space(cache->free_space_ctl, entry);
7014 static int verify_space_cache(struct btrfs_root *root,
7015 struct btrfs_block_group_cache *cache)
7017 struct btrfs_path path;
7018 struct extent_buffer *leaf;
7019 struct btrfs_key key;
7023 root = root->fs_info->extent_root;
7025 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7027 btrfs_init_path(&path);
7028 key.objectid = last;
7030 key.type = BTRFS_EXTENT_ITEM_KEY;
7031 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7036 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7037 ret = btrfs_next_leaf(root, &path);
7045 leaf = path.nodes[0];
7046 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7047 if (key.objectid >= cache->key.offset + cache->key.objectid)
7049 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7050 key.type != BTRFS_METADATA_ITEM_KEY) {
7055 if (last == key.objectid) {
7056 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7057 last = key.objectid + key.offset;
7059 last = key.objectid + root->fs_info->nodesize;
7064 ret = check_cache_range(root, cache, last,
7065 key.objectid - last);
7068 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7069 last = key.objectid + key.offset;
7071 last = key.objectid + root->fs_info->nodesize;
7075 if (last < cache->key.objectid + cache->key.offset)
7076 ret = check_cache_range(root, cache, last,
7077 cache->key.objectid +
7078 cache->key.offset - last);
7081 btrfs_release_path(&path);
7084 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7085 fprintf(stderr, "There are still entries left in the space "
7093 static int check_space_cache(struct btrfs_root *root)
7095 struct btrfs_block_group_cache *cache;
7096 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7100 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7101 btrfs_super_generation(root->fs_info->super_copy) !=
7102 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7103 printf("cache and super generation don't match, space cache "
7104 "will be invalidated\n");
7108 if (ctx.progress_enabled) {
7109 ctx.tp = TASK_FREE_SPACE;
7110 task_start(ctx.info);
7114 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7118 start = cache->key.objectid + cache->key.offset;
7119 if (!cache->free_space_ctl) {
7120 if (btrfs_init_free_space_ctl(cache,
7121 root->fs_info->sectorsize)) {
7126 btrfs_remove_free_space_cache(cache);
7129 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7130 ret = exclude_super_stripes(root, cache);
7132 fprintf(stderr, "could not exclude super stripes: %s\n",
7137 ret = load_free_space_tree(root->fs_info, cache);
7138 free_excluded_extents(root, cache);
7140 fprintf(stderr, "could not load free space tree: %s\n",
7147 ret = load_free_space_cache(root->fs_info, cache);
7152 ret = verify_space_cache(root, cache);
7154 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7155 cache->key.objectid);
7160 task_stop(ctx.info);
7162 return error ? -EINVAL : 0;
7165 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7166 u64 num_bytes, unsigned long leaf_offset,
7167 struct extent_buffer *eb) {
7170 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7172 unsigned long csum_offset;
7176 u64 data_checked = 0;
7182 if (num_bytes % root->fs_info->sectorsize)
7185 data = malloc(num_bytes);
7189 while (offset < num_bytes) {
7192 read_len = num_bytes - offset;
7193 /* read as much space once a time */
7194 ret = read_extent_data(root, data + offset,
7195 bytenr + offset, &read_len, mirror);
7199 /* verify every 4k data's checksum */
7200 while (data_checked < read_len) {
7202 tmp = offset + data_checked;
7204 csum = btrfs_csum_data((char *)data + tmp,
7205 csum, root->fs_info->sectorsize);
7206 btrfs_csum_final(csum, (u8 *)&csum);
7208 csum_offset = leaf_offset +
7209 tmp / root->fs_info->sectorsize * csum_size;
7210 read_extent_buffer(eb, (char *)&csum_expected,
7211 csum_offset, csum_size);
7212 /* try another mirror */
7213 if (csum != csum_expected) {
7214 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7215 mirror, bytenr + tmp,
7216 csum, csum_expected);
7217 num_copies = btrfs_num_copies(
7218 &root->fs_info->mapping_tree,
7220 if (mirror < num_copies - 1) {
7225 data_checked += root->fs_info->sectorsize;
7234 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7237 struct btrfs_path path;
7238 struct extent_buffer *leaf;
7239 struct btrfs_key key;
7242 btrfs_init_path(&path);
7243 key.objectid = bytenr;
7244 key.type = BTRFS_EXTENT_ITEM_KEY;
7245 key.offset = (u64)-1;
7248 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7251 fprintf(stderr, "Error looking up extent record %d\n", ret);
7252 btrfs_release_path(&path);
7255 if (path.slots[0] > 0) {
7258 ret = btrfs_prev_leaf(root, &path);
7261 } else if (ret > 0) {
7268 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7271 * Block group items come before extent items if they have the same
7272 * bytenr, so walk back one more just in case. Dear future traveller,
7273 * first congrats on mastering time travel. Now if it's not too much
7274 * trouble could you go back to 2006 and tell Chris to make the
7275 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7276 * EXTENT_ITEM_KEY please?
7278 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7279 if (path.slots[0] > 0) {
7282 ret = btrfs_prev_leaf(root, &path);
7285 } else if (ret > 0) {
7290 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7294 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7295 ret = btrfs_next_leaf(root, &path);
7297 fprintf(stderr, "Error going to next leaf "
7299 btrfs_release_path(&path);
7305 leaf = path.nodes[0];
7306 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7307 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7311 if (key.objectid + key.offset < bytenr) {
7315 if (key.objectid > bytenr + num_bytes)
7318 if (key.objectid == bytenr) {
7319 if (key.offset >= num_bytes) {
7323 num_bytes -= key.offset;
7324 bytenr += key.offset;
7325 } else if (key.objectid < bytenr) {
7326 if (key.objectid + key.offset >= bytenr + num_bytes) {
7330 num_bytes = (bytenr + num_bytes) -
7331 (key.objectid + key.offset);
7332 bytenr = key.objectid + key.offset;
7334 if (key.objectid + key.offset < bytenr + num_bytes) {
7335 u64 new_start = key.objectid + key.offset;
7336 u64 new_bytes = bytenr + num_bytes - new_start;
7339 * Weird case, the extent is in the middle of
7340 * our range, we'll have to search one side
7341 * and then the other. Not sure if this happens
7342 * in real life, but no harm in coding it up
7343 * anyway just in case.
7345 btrfs_release_path(&path);
7346 ret = check_extent_exists(root, new_start,
7349 fprintf(stderr, "Right section didn't "
7353 num_bytes = key.objectid - bytenr;
7356 num_bytes = key.objectid - bytenr;
7363 if (num_bytes && !ret) {
7364 fprintf(stderr, "There are no extents for csum range "
7365 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7369 btrfs_release_path(&path);
7373 static int check_csums(struct btrfs_root *root)
7375 struct btrfs_path path;
7376 struct extent_buffer *leaf;
7377 struct btrfs_key key;
7378 u64 offset = 0, num_bytes = 0;
7379 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7383 unsigned long leaf_offset;
7385 root = root->fs_info->csum_root;
7386 if (!extent_buffer_uptodate(root->node)) {
7387 fprintf(stderr, "No valid csum tree found\n");
7391 btrfs_init_path(&path);
7392 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7393 key.type = BTRFS_EXTENT_CSUM_KEY;
7395 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7397 fprintf(stderr, "Error searching csum tree %d\n", ret);
7398 btrfs_release_path(&path);
7402 if (ret > 0 && path.slots[0])
7407 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7408 ret = btrfs_next_leaf(root, &path);
7410 fprintf(stderr, "Error going to next leaf "
7417 leaf = path.nodes[0];
7419 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7420 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7425 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7426 csum_size) * root->fs_info->sectorsize;
7427 if (!check_data_csum)
7428 goto skip_csum_check;
7429 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7430 ret = check_extent_csums(root, key.offset, data_len,
7436 offset = key.offset;
7437 } else if (key.offset != offset + num_bytes) {
7438 ret = check_extent_exists(root, offset, num_bytes);
7440 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7441 "there is no extent record\n",
7442 offset, offset+num_bytes);
7445 offset = key.offset;
7448 num_bytes += data_len;
7452 btrfs_release_path(&path);
7456 static int is_dropped_key(struct btrfs_key *key,
7457 struct btrfs_key *drop_key) {
7458 if (key->objectid < drop_key->objectid)
7460 else if (key->objectid == drop_key->objectid) {
7461 if (key->type < drop_key->type)
7463 else if (key->type == drop_key->type) {
7464 if (key->offset < drop_key->offset)
7472 * Here are the rules for FULL_BACKREF.
7474 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7475 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7477 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7478 * if it happened after the relocation occurred since we'll have dropped the
7479 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7480 * have no real way to know for sure.
7482 * We process the blocks one root at a time, and we start from the lowest root
7483 * objectid and go to the highest. So we can just lookup the owner backref for
7484 * the record and if we don't find it then we know it doesn't exist and we have
7487 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7488 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7489 * be set or not and then we can check later once we've gathered all the refs.
7491 static int calc_extent_flag(struct cache_tree *extent_cache,
7492 struct extent_buffer *buf,
7493 struct root_item_record *ri,
7496 struct extent_record *rec;
7497 struct cache_extent *cache;
7498 struct tree_backref *tback;
7501 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7502 /* we have added this extent before */
7506 rec = container_of(cache, struct extent_record, cache);
7509 * Except file/reloc tree, we can not have
7512 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7517 if (buf->start == ri->bytenr)
7520 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7523 owner = btrfs_header_owner(buf);
7524 if (owner == ri->objectid)
7527 tback = find_tree_backref(rec, 0, owner);
7532 if (rec->flag_block_full_backref != FLAG_UNSET &&
7533 rec->flag_block_full_backref != 0)
7534 rec->bad_full_backref = 1;
7537 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7538 if (rec->flag_block_full_backref != FLAG_UNSET &&
7539 rec->flag_block_full_backref != 1)
7540 rec->bad_full_backref = 1;
7544 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7546 fprintf(stderr, "Invalid key type(");
7547 print_key_type(stderr, 0, key_type);
7548 fprintf(stderr, ") found in root(");
7549 print_objectid(stderr, rootid, 0);
7550 fprintf(stderr, ")\n");
7554 * Check if the key is valid with its extent buffer.
7556 * This is a early check in case invalid key exists in a extent buffer
7557 * This is not comprehensive yet, but should prevent wrong key/item passed
7560 static int check_type_with_root(u64 rootid, u8 key_type)
7563 /* Only valid in chunk tree */
7564 case BTRFS_DEV_ITEM_KEY:
7565 case BTRFS_CHUNK_ITEM_KEY:
7566 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7569 /* valid in csum and log tree */
7570 case BTRFS_CSUM_TREE_OBJECTID:
7571 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7575 case BTRFS_EXTENT_ITEM_KEY:
7576 case BTRFS_METADATA_ITEM_KEY:
7577 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7578 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7581 case BTRFS_ROOT_ITEM_KEY:
7582 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7585 case BTRFS_DEV_EXTENT_KEY:
7586 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7592 report_mismatch_key_root(key_type, rootid);
7596 static int run_next_block(struct btrfs_root *root,
7597 struct block_info *bits,
7600 struct cache_tree *pending,
7601 struct cache_tree *seen,
7602 struct cache_tree *reada,
7603 struct cache_tree *nodes,
7604 struct cache_tree *extent_cache,
7605 struct cache_tree *chunk_cache,
7606 struct rb_root *dev_cache,
7607 struct block_group_tree *block_group_cache,
7608 struct device_extent_tree *dev_extent_cache,
7609 struct root_item_record *ri)
7611 struct extent_buffer *buf;
7612 struct extent_record *rec = NULL;
7623 struct btrfs_key key;
7624 struct cache_extent *cache;
7627 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7628 bits_nr, &reada_bits);
7633 for(i = 0; i < nritems; i++) {
7634 ret = add_cache_extent(reada, bits[i].start,
7639 /* fixme, get the parent transid */
7640 readahead_tree_block(root, bits[i].start,
7644 *last = bits[0].start;
7645 bytenr = bits[0].start;
7646 size = bits[0].size;
7648 cache = lookup_cache_extent(pending, bytenr, size);
7650 remove_cache_extent(pending, cache);
7653 cache = lookup_cache_extent(reada, bytenr, size);
7655 remove_cache_extent(reada, cache);
7658 cache = lookup_cache_extent(nodes, bytenr, size);
7660 remove_cache_extent(nodes, cache);
7663 cache = lookup_cache_extent(extent_cache, bytenr, size);
7665 rec = container_of(cache, struct extent_record, cache);
7666 gen = rec->parent_generation;
7669 /* fixme, get the real parent transid */
7670 buf = read_tree_block(root->fs_info, bytenr, size, gen);
7671 if (!extent_buffer_uptodate(buf)) {
7672 record_bad_block_io(root->fs_info,
7673 extent_cache, bytenr, size);
7677 nritems = btrfs_header_nritems(buf);
7680 if (!init_extent_tree) {
7681 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7682 btrfs_header_level(buf), 1, NULL,
7685 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7687 fprintf(stderr, "Couldn't calc extent flags\n");
7688 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7693 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7695 fprintf(stderr, "Couldn't calc extent flags\n");
7696 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7700 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7702 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7703 ri->objectid == btrfs_header_owner(buf)) {
7705 * Ok we got to this block from it's original owner and
7706 * we have FULL_BACKREF set. Relocation can leave
7707 * converted blocks over so this is altogether possible,
7708 * however it's not possible if the generation > the
7709 * last snapshot, so check for this case.
7711 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7712 btrfs_header_generation(buf) > ri->last_snapshot) {
7713 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7714 rec->bad_full_backref = 1;
7719 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7720 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7721 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7722 rec->bad_full_backref = 1;
7726 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7727 rec->flag_block_full_backref = 1;
7731 rec->flag_block_full_backref = 0;
7733 owner = btrfs_header_owner(buf);
7736 ret = check_block(root, extent_cache, buf, flags);
7740 if (btrfs_is_leaf(buf)) {
7741 btree_space_waste += btrfs_leaf_free_space(root, buf);
7742 for (i = 0; i < nritems; i++) {
7743 struct btrfs_file_extent_item *fi;
7744 btrfs_item_key_to_cpu(buf, &key, i);
7746 * Check key type against the leaf owner.
7747 * Could filter quite a lot of early error if
7750 if (check_type_with_root(btrfs_header_owner(buf),
7752 fprintf(stderr, "ignoring invalid key\n");
7755 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7756 process_extent_item(root, extent_cache, buf,
7760 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7761 process_extent_item(root, extent_cache, buf,
7765 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7767 btrfs_item_size_nr(buf, i);
7770 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7771 process_chunk_item(chunk_cache, &key, buf, i);
7774 if (key.type == BTRFS_DEV_ITEM_KEY) {
7775 process_device_item(dev_cache, &key, buf, i);
7778 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7779 process_block_group_item(block_group_cache,
7783 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7784 process_device_extent_item(dev_extent_cache,
7789 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7790 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7791 process_extent_ref_v0(extent_cache, buf, i);
7798 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7799 ret = add_tree_backref(extent_cache,
7800 key.objectid, 0, key.offset, 0);
7803 "add_tree_backref failed (leaf tree block): %s",
7807 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7808 ret = add_tree_backref(extent_cache,
7809 key.objectid, key.offset, 0, 0);
7812 "add_tree_backref failed (leaf shared block): %s",
7816 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7817 struct btrfs_extent_data_ref *ref;
7818 ref = btrfs_item_ptr(buf, i,
7819 struct btrfs_extent_data_ref);
7820 add_data_backref(extent_cache,
7822 btrfs_extent_data_ref_root(buf, ref),
7823 btrfs_extent_data_ref_objectid(buf,
7825 btrfs_extent_data_ref_offset(buf, ref),
7826 btrfs_extent_data_ref_count(buf, ref),
7827 0, root->fs_info->sectorsize);
7830 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7831 struct btrfs_shared_data_ref *ref;
7832 ref = btrfs_item_ptr(buf, i,
7833 struct btrfs_shared_data_ref);
7834 add_data_backref(extent_cache,
7835 key.objectid, key.offset, 0, 0, 0,
7836 btrfs_shared_data_ref_count(buf, ref),
7837 0, root->fs_info->sectorsize);
7840 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7841 struct bad_item *bad;
7843 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7847 bad = malloc(sizeof(struct bad_item));
7850 INIT_LIST_HEAD(&bad->list);
7851 memcpy(&bad->key, &key,
7852 sizeof(struct btrfs_key));
7853 bad->root_id = owner;
7854 list_add_tail(&bad->list, &delete_items);
7857 if (key.type != BTRFS_EXTENT_DATA_KEY)
7859 fi = btrfs_item_ptr(buf, i,
7860 struct btrfs_file_extent_item);
7861 if (btrfs_file_extent_type(buf, fi) ==
7862 BTRFS_FILE_EXTENT_INLINE)
7864 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7867 data_bytes_allocated +=
7868 btrfs_file_extent_disk_num_bytes(buf, fi);
7869 if (data_bytes_allocated < root->fs_info->sectorsize) {
7872 data_bytes_referenced +=
7873 btrfs_file_extent_num_bytes(buf, fi);
7874 add_data_backref(extent_cache,
7875 btrfs_file_extent_disk_bytenr(buf, fi),
7876 parent, owner, key.objectid, key.offset -
7877 btrfs_file_extent_offset(buf, fi), 1, 1,
7878 btrfs_file_extent_disk_num_bytes(buf, fi));
7882 struct btrfs_key first_key;
7884 first_key.objectid = 0;
7887 btrfs_item_key_to_cpu(buf, &first_key, 0);
7888 level = btrfs_header_level(buf);
7889 for (i = 0; i < nritems; i++) {
7890 struct extent_record tmpl;
7892 ptr = btrfs_node_blockptr(buf, i);
7893 size = root->fs_info->nodesize;
7894 btrfs_node_key_to_cpu(buf, &key, i);
7896 if ((level == ri->drop_level)
7897 && is_dropped_key(&key, &ri->drop_key)) {
7902 memset(&tmpl, 0, sizeof(tmpl));
7903 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7904 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7909 tmpl.max_size = size;
7910 ret = add_extent_rec(extent_cache, &tmpl);
7914 ret = add_tree_backref(extent_cache, ptr, parent,
7918 "add_tree_backref failed (non-leaf block): %s",
7924 add_pending(nodes, seen, ptr, size);
7926 add_pending(pending, seen, ptr, size);
7929 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7930 nritems) * sizeof(struct btrfs_key_ptr);
7932 total_btree_bytes += buf->len;
7933 if (fs_root_objectid(btrfs_header_owner(buf)))
7934 total_fs_tree_bytes += buf->len;
7935 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7936 total_extent_tree_bytes += buf->len;
7937 if (!found_old_backref &&
7938 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7939 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7940 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7941 found_old_backref = 1;
7943 free_extent_buffer(buf);
7947 static int add_root_to_pending(struct extent_buffer *buf,
7948 struct cache_tree *extent_cache,
7949 struct cache_tree *pending,
7950 struct cache_tree *seen,
7951 struct cache_tree *nodes,
7954 struct extent_record tmpl;
7957 if (btrfs_header_level(buf) > 0)
7958 add_pending(nodes, seen, buf->start, buf->len);
7960 add_pending(pending, seen, buf->start, buf->len);
7962 memset(&tmpl, 0, sizeof(tmpl));
7963 tmpl.start = buf->start;
7968 tmpl.max_size = buf->len;
7969 add_extent_rec(extent_cache, &tmpl);
7971 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7972 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7973 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7976 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7981 /* as we fix the tree, we might be deleting blocks that
7982 * we're tracking for repair. This hook makes sure we
7983 * remove any backrefs for blocks as we are fixing them.
7985 static int free_extent_hook(struct btrfs_trans_handle *trans,
7986 struct btrfs_root *root,
7987 u64 bytenr, u64 num_bytes, u64 parent,
7988 u64 root_objectid, u64 owner, u64 offset,
7991 struct extent_record *rec;
7992 struct cache_extent *cache;
7994 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7996 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7997 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8001 rec = container_of(cache, struct extent_record, cache);
8003 struct data_backref *back;
8004 back = find_data_backref(rec, parent, root_objectid, owner,
8005 offset, 1, bytenr, num_bytes);
8008 if (back->node.found_ref) {
8009 back->found_ref -= refs_to_drop;
8011 rec->refs -= refs_to_drop;
8013 if (back->node.found_extent_tree) {
8014 back->num_refs -= refs_to_drop;
8015 if (rec->extent_item_refs)
8016 rec->extent_item_refs -= refs_to_drop;
8018 if (back->found_ref == 0)
8019 back->node.found_ref = 0;
8020 if (back->num_refs == 0)
8021 back->node.found_extent_tree = 0;
8023 if (!back->node.found_extent_tree && back->node.found_ref) {
8024 list_del(&back->node.list);
8028 struct tree_backref *back;
8029 back = find_tree_backref(rec, parent, root_objectid);
8032 if (back->node.found_ref) {
8035 back->node.found_ref = 0;
8037 if (back->node.found_extent_tree) {
8038 if (rec->extent_item_refs)
8039 rec->extent_item_refs--;
8040 back->node.found_extent_tree = 0;
8042 if (!back->node.found_extent_tree && back->node.found_ref) {
8043 list_del(&back->node.list);
8047 maybe_free_extent_rec(extent_cache, rec);
8052 static int delete_extent_records(struct btrfs_trans_handle *trans,
8053 struct btrfs_root *root,
8054 struct btrfs_path *path,
8057 struct btrfs_key key;
8058 struct btrfs_key found_key;
8059 struct extent_buffer *leaf;
8064 key.objectid = bytenr;
8066 key.offset = (u64)-1;
8069 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8076 if (path->slots[0] == 0)
8082 leaf = path->nodes[0];
8083 slot = path->slots[0];
8085 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8086 if (found_key.objectid != bytenr)
8089 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8090 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8091 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8092 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8093 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8094 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8095 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8096 btrfs_release_path(path);
8097 if (found_key.type == 0) {
8098 if (found_key.offset == 0)
8100 key.offset = found_key.offset - 1;
8101 key.type = found_key.type;
8103 key.type = found_key.type - 1;
8104 key.offset = (u64)-1;
8108 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8109 found_key.objectid, found_key.type, found_key.offset);
8111 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8114 btrfs_release_path(path);
8116 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8117 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8118 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8119 found_key.offset : root->fs_info->nodesize;
8121 ret = btrfs_update_block_group(trans, root, bytenr,
8128 btrfs_release_path(path);
8133 * for a single backref, this will allocate a new extent
8134 * and add the backref to it.
8136 static int record_extent(struct btrfs_trans_handle *trans,
8137 struct btrfs_fs_info *info,
8138 struct btrfs_path *path,
8139 struct extent_record *rec,
8140 struct extent_backref *back,
8141 int allocated, u64 flags)
8144 struct btrfs_root *extent_root = info->extent_root;
8145 struct extent_buffer *leaf;
8146 struct btrfs_key ins_key;
8147 struct btrfs_extent_item *ei;
8148 struct data_backref *dback;
8149 struct btrfs_tree_block_info *bi;
8152 rec->max_size = max_t(u64, rec->max_size,
8156 u32 item_size = sizeof(*ei);
8159 item_size += sizeof(*bi);
8161 ins_key.objectid = rec->start;
8162 ins_key.offset = rec->max_size;
8163 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8165 ret = btrfs_insert_empty_item(trans, extent_root, path,
8166 &ins_key, item_size);
8170 leaf = path->nodes[0];
8171 ei = btrfs_item_ptr(leaf, path->slots[0],
8172 struct btrfs_extent_item);
8174 btrfs_set_extent_refs(leaf, ei, 0);
8175 btrfs_set_extent_generation(leaf, ei, rec->generation);
8177 if (back->is_data) {
8178 btrfs_set_extent_flags(leaf, ei,
8179 BTRFS_EXTENT_FLAG_DATA);
8181 struct btrfs_disk_key copy_key;;
8183 bi = (struct btrfs_tree_block_info *)(ei + 1);
8184 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8187 btrfs_set_disk_key_objectid(©_key,
8188 rec->info_objectid);
8189 btrfs_set_disk_key_type(©_key, 0);
8190 btrfs_set_disk_key_offset(©_key, 0);
8192 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8193 btrfs_set_tree_block_key(leaf, bi, ©_key);
8195 btrfs_set_extent_flags(leaf, ei,
8196 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8199 btrfs_mark_buffer_dirty(leaf);
8200 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8201 rec->max_size, 1, 0);
8204 btrfs_release_path(path);
8207 if (back->is_data) {
8211 dback = to_data_backref(back);
8212 if (back->full_backref)
8213 parent = dback->parent;
8217 for (i = 0; i < dback->found_ref; i++) {
8218 /* if parent != 0, we're doing a full backref
8219 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8220 * just makes the backref allocator create a data
8223 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8224 rec->start, rec->max_size,
8228 BTRFS_FIRST_FREE_OBJECTID :
8234 fprintf(stderr, "adding new data backref"
8235 " on %llu %s %llu owner %llu"
8236 " offset %llu found %d\n",
8237 (unsigned long long)rec->start,
8238 back->full_backref ?
8240 back->full_backref ?
8241 (unsigned long long)parent :
8242 (unsigned long long)dback->root,
8243 (unsigned long long)dback->owner,
8244 (unsigned long long)dback->offset,
8248 struct tree_backref *tback;
8250 tback = to_tree_backref(back);
8251 if (back->full_backref)
8252 parent = tback->parent;
8256 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8257 rec->start, rec->max_size,
8258 parent, tback->root, 0, 0);
8259 fprintf(stderr, "adding new tree backref on "
8260 "start %llu len %llu parent %llu root %llu\n",
8261 rec->start, rec->max_size, parent, tback->root);
8264 btrfs_release_path(path);
8268 static struct extent_entry *find_entry(struct list_head *entries,
8269 u64 bytenr, u64 bytes)
8271 struct extent_entry *entry = NULL;
8273 list_for_each_entry(entry, entries, list) {
8274 if (entry->bytenr == bytenr && entry->bytes == bytes)
8281 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8283 struct extent_entry *entry, *best = NULL, *prev = NULL;
8285 list_for_each_entry(entry, entries, list) {
8287 * If there are as many broken entries as entries then we know
8288 * not to trust this particular entry.
8290 if (entry->broken == entry->count)
8294 * Special case, when there are only two entries and 'best' is
8304 * If our current entry == best then we can't be sure our best
8305 * is really the best, so we need to keep searching.
8307 if (best && best->count == entry->count) {
8313 /* Prev == entry, not good enough, have to keep searching */
8314 if (!prev->broken && prev->count == entry->count)
8318 best = (prev->count > entry->count) ? prev : entry;
8319 else if (best->count < entry->count)
8327 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8328 struct data_backref *dback, struct extent_entry *entry)
8330 struct btrfs_trans_handle *trans;
8331 struct btrfs_root *root;
8332 struct btrfs_file_extent_item *fi;
8333 struct extent_buffer *leaf;
8334 struct btrfs_key key;
8338 key.objectid = dback->root;
8339 key.type = BTRFS_ROOT_ITEM_KEY;
8340 key.offset = (u64)-1;
8341 root = btrfs_read_fs_root(info, &key);
8343 fprintf(stderr, "Couldn't find root for our ref\n");
8348 * The backref points to the original offset of the extent if it was
8349 * split, so we need to search down to the offset we have and then walk
8350 * forward until we find the backref we're looking for.
8352 key.objectid = dback->owner;
8353 key.type = BTRFS_EXTENT_DATA_KEY;
8354 key.offset = dback->offset;
8355 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8357 fprintf(stderr, "Error looking up ref %d\n", ret);
8362 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8363 ret = btrfs_next_leaf(root, path);
8365 fprintf(stderr, "Couldn't find our ref, next\n");
8369 leaf = path->nodes[0];
8370 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8371 if (key.objectid != dback->owner ||
8372 key.type != BTRFS_EXTENT_DATA_KEY) {
8373 fprintf(stderr, "Couldn't find our ref, search\n");
8376 fi = btrfs_item_ptr(leaf, path->slots[0],
8377 struct btrfs_file_extent_item);
8378 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8379 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8381 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8386 btrfs_release_path(path);
8388 trans = btrfs_start_transaction(root, 1);
8390 return PTR_ERR(trans);
8393 * Ok we have the key of the file extent we want to fix, now we can cow
8394 * down to the thing and fix it.
8396 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8398 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8399 key.objectid, key.type, key.offset, ret);
8403 fprintf(stderr, "Well that's odd, we just found this key "
8404 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8409 leaf = path->nodes[0];
8410 fi = btrfs_item_ptr(leaf, path->slots[0],
8411 struct btrfs_file_extent_item);
8413 if (btrfs_file_extent_compression(leaf, fi) &&
8414 dback->disk_bytenr != entry->bytenr) {
8415 fprintf(stderr, "Ref doesn't match the record start and is "
8416 "compressed, please take a btrfs-image of this file "
8417 "system and send it to a btrfs developer so they can "
8418 "complete this functionality for bytenr %Lu\n",
8419 dback->disk_bytenr);
8424 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8425 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8426 } else if (dback->disk_bytenr > entry->bytenr) {
8427 u64 off_diff, offset;
8429 off_diff = dback->disk_bytenr - entry->bytenr;
8430 offset = btrfs_file_extent_offset(leaf, fi);
8431 if (dback->disk_bytenr + offset +
8432 btrfs_file_extent_num_bytes(leaf, fi) >
8433 entry->bytenr + entry->bytes) {
8434 fprintf(stderr, "Ref is past the entry end, please "
8435 "take a btrfs-image of this file system and "
8436 "send it to a btrfs developer, ref %Lu\n",
8437 dback->disk_bytenr);
8442 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8443 btrfs_set_file_extent_offset(leaf, fi, offset);
8444 } else if (dback->disk_bytenr < entry->bytenr) {
8447 offset = btrfs_file_extent_offset(leaf, fi);
8448 if (dback->disk_bytenr + offset < entry->bytenr) {
8449 fprintf(stderr, "Ref is before the entry start, please"
8450 " take a btrfs-image of this file system and "
8451 "send it to a btrfs developer, ref %Lu\n",
8452 dback->disk_bytenr);
8457 offset += dback->disk_bytenr;
8458 offset -= entry->bytenr;
8459 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8460 btrfs_set_file_extent_offset(leaf, fi, offset);
8463 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8466 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8467 * only do this if we aren't using compression, otherwise it's a
8470 if (!btrfs_file_extent_compression(leaf, fi))
8471 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8473 printf("ram bytes may be wrong?\n");
8474 btrfs_mark_buffer_dirty(leaf);
8476 err = btrfs_commit_transaction(trans, root);
8477 btrfs_release_path(path);
8478 return ret ? ret : err;
8481 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8482 struct extent_record *rec)
8484 struct extent_backref *back;
8485 struct data_backref *dback;
8486 struct extent_entry *entry, *best = NULL;
8489 int broken_entries = 0;
8494 * Metadata is easy and the backrefs should always agree on bytenr and
8495 * size, if not we've got bigger issues.
8500 list_for_each_entry(back, &rec->backrefs, list) {
8501 if (back->full_backref || !back->is_data)
8504 dback = to_data_backref(back);
8507 * We only pay attention to backrefs that we found a real
8510 if (dback->found_ref == 0)
8514 * For now we only catch when the bytes don't match, not the
8515 * bytenr. We can easily do this at the same time, but I want
8516 * to have a fs image to test on before we just add repair
8517 * functionality willy-nilly so we know we won't screw up the
8521 entry = find_entry(&entries, dback->disk_bytenr,
8524 entry = malloc(sizeof(struct extent_entry));
8529 memset(entry, 0, sizeof(*entry));
8530 entry->bytenr = dback->disk_bytenr;
8531 entry->bytes = dback->bytes;
8532 list_add_tail(&entry->list, &entries);
8537 * If we only have on entry we may think the entries agree when
8538 * in reality they don't so we have to do some extra checking.
8540 if (dback->disk_bytenr != rec->start ||
8541 dback->bytes != rec->nr || back->broken)
8552 /* Yay all the backrefs agree, carry on good sir */
8553 if (nr_entries <= 1 && !mismatch)
8556 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8557 "%Lu\n", rec->start);
8560 * First we want to see if the backrefs can agree amongst themselves who
8561 * is right, so figure out which one of the entries has the highest
8564 best = find_most_right_entry(&entries);
8567 * Ok so we may have an even split between what the backrefs think, so
8568 * this is where we use the extent ref to see what it thinks.
8571 entry = find_entry(&entries, rec->start, rec->nr);
8572 if (!entry && (!broken_entries || !rec->found_rec)) {
8573 fprintf(stderr, "Backrefs don't agree with each other "
8574 "and extent record doesn't agree with anybody,"
8575 " so we can't fix bytenr %Lu bytes %Lu\n",
8576 rec->start, rec->nr);
8579 } else if (!entry) {
8581 * Ok our backrefs were broken, we'll assume this is the
8582 * correct value and add an entry for this range.
8584 entry = malloc(sizeof(struct extent_entry));
8589 memset(entry, 0, sizeof(*entry));
8590 entry->bytenr = rec->start;
8591 entry->bytes = rec->nr;
8592 list_add_tail(&entry->list, &entries);
8596 best = find_most_right_entry(&entries);
8598 fprintf(stderr, "Backrefs and extent record evenly "
8599 "split on who is right, this is going to "
8600 "require user input to fix bytenr %Lu bytes "
8601 "%Lu\n", rec->start, rec->nr);
8608 * I don't think this can happen currently as we'll abort() if we catch
8609 * this case higher up, but in case somebody removes that we still can't
8610 * deal with it properly here yet, so just bail out of that's the case.
8612 if (best->bytenr != rec->start) {
8613 fprintf(stderr, "Extent start and backref starts don't match, "
8614 "please use btrfs-image on this file system and send "
8615 "it to a btrfs developer so they can make fsck fix "
8616 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8617 rec->start, rec->nr);
8623 * Ok great we all agreed on an extent record, let's go find the real
8624 * references and fix up the ones that don't match.
8626 list_for_each_entry(back, &rec->backrefs, list) {
8627 if (back->full_backref || !back->is_data)
8630 dback = to_data_backref(back);
8633 * Still ignoring backrefs that don't have a real ref attached
8636 if (dback->found_ref == 0)
8639 if (dback->bytes == best->bytes &&
8640 dback->disk_bytenr == best->bytenr)
8643 ret = repair_ref(info, path, dback, best);
8649 * Ok we messed with the actual refs, which means we need to drop our
8650 * entire cache and go back and rescan. I know this is a huge pain and
8651 * adds a lot of extra work, but it's the only way to be safe. Once all
8652 * the backrefs agree we may not need to do anything to the extent
8657 while (!list_empty(&entries)) {
8658 entry = list_entry(entries.next, struct extent_entry, list);
8659 list_del_init(&entry->list);
8665 static int process_duplicates(struct cache_tree *extent_cache,
8666 struct extent_record *rec)
8668 struct extent_record *good, *tmp;
8669 struct cache_extent *cache;
8673 * If we found a extent record for this extent then return, or if we
8674 * have more than one duplicate we are likely going to need to delete
8677 if (rec->found_rec || rec->num_duplicates > 1)
8680 /* Shouldn't happen but just in case */
8681 BUG_ON(!rec->num_duplicates);
8684 * So this happens if we end up with a backref that doesn't match the
8685 * actual extent entry. So either the backref is bad or the extent
8686 * entry is bad. Either way we want to have the extent_record actually
8687 * reflect what we found in the extent_tree, so we need to take the
8688 * duplicate out and use that as the extent_record since the only way we
8689 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8691 remove_cache_extent(extent_cache, &rec->cache);
8693 good = to_extent_record(rec->dups.next);
8694 list_del_init(&good->list);
8695 INIT_LIST_HEAD(&good->backrefs);
8696 INIT_LIST_HEAD(&good->dups);
8697 good->cache.start = good->start;
8698 good->cache.size = good->nr;
8699 good->content_checked = 0;
8700 good->owner_ref_checked = 0;
8701 good->num_duplicates = 0;
8702 good->refs = rec->refs;
8703 list_splice_init(&rec->backrefs, &good->backrefs);
8705 cache = lookup_cache_extent(extent_cache, good->start,
8709 tmp = container_of(cache, struct extent_record, cache);
8712 * If we find another overlapping extent and it's found_rec is
8713 * set then it's a duplicate and we need to try and delete
8716 if (tmp->found_rec || tmp->num_duplicates > 0) {
8717 if (list_empty(&good->list))
8718 list_add_tail(&good->list,
8719 &duplicate_extents);
8720 good->num_duplicates += tmp->num_duplicates + 1;
8721 list_splice_init(&tmp->dups, &good->dups);
8722 list_del_init(&tmp->list);
8723 list_add_tail(&tmp->list, &good->dups);
8724 remove_cache_extent(extent_cache, &tmp->cache);
8729 * Ok we have another non extent item backed extent rec, so lets
8730 * just add it to this extent and carry on like we did above.
8732 good->refs += tmp->refs;
8733 list_splice_init(&tmp->backrefs, &good->backrefs);
8734 remove_cache_extent(extent_cache, &tmp->cache);
8737 ret = insert_cache_extent(extent_cache, &good->cache);
8740 return good->num_duplicates ? 0 : 1;
8743 static int delete_duplicate_records(struct btrfs_root *root,
8744 struct extent_record *rec)
8746 struct btrfs_trans_handle *trans;
8747 LIST_HEAD(delete_list);
8748 struct btrfs_path path;
8749 struct extent_record *tmp, *good, *n;
8752 struct btrfs_key key;
8754 btrfs_init_path(&path);
8757 /* Find the record that covers all of the duplicates. */
8758 list_for_each_entry(tmp, &rec->dups, list) {
8759 if (good->start < tmp->start)
8761 if (good->nr > tmp->nr)
8764 if (tmp->start + tmp->nr < good->start + good->nr) {
8765 fprintf(stderr, "Ok we have overlapping extents that "
8766 "aren't completely covered by each other, this "
8767 "is going to require more careful thought. "
8768 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8769 tmp->start, tmp->nr, good->start, good->nr);
8776 list_add_tail(&rec->list, &delete_list);
8778 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8781 list_move_tail(&tmp->list, &delete_list);
8784 root = root->fs_info->extent_root;
8785 trans = btrfs_start_transaction(root, 1);
8786 if (IS_ERR(trans)) {
8787 ret = PTR_ERR(trans);
8791 list_for_each_entry(tmp, &delete_list, list) {
8792 if (tmp->found_rec == 0)
8794 key.objectid = tmp->start;
8795 key.type = BTRFS_EXTENT_ITEM_KEY;
8796 key.offset = tmp->nr;
8798 /* Shouldn't happen but just in case */
8799 if (tmp->metadata) {
8800 fprintf(stderr, "Well this shouldn't happen, extent "
8801 "record overlaps but is metadata? "
8802 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8806 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8812 ret = btrfs_del_item(trans, root, &path);
8815 btrfs_release_path(&path);
8818 err = btrfs_commit_transaction(trans, root);
8822 while (!list_empty(&delete_list)) {
8823 tmp = to_extent_record(delete_list.next);
8824 list_del_init(&tmp->list);
8830 while (!list_empty(&rec->dups)) {
8831 tmp = to_extent_record(rec->dups.next);
8832 list_del_init(&tmp->list);
8836 btrfs_release_path(&path);
8838 if (!ret && !nr_del)
8839 rec->num_duplicates = 0;
8841 return ret ? ret : nr_del;
8844 static int find_possible_backrefs(struct btrfs_fs_info *info,
8845 struct btrfs_path *path,
8846 struct cache_tree *extent_cache,
8847 struct extent_record *rec)
8849 struct btrfs_root *root;
8850 struct extent_backref *back;
8851 struct data_backref *dback;
8852 struct cache_extent *cache;
8853 struct btrfs_file_extent_item *fi;
8854 struct btrfs_key key;
8858 list_for_each_entry(back, &rec->backrefs, list) {
8859 /* Don't care about full backrefs (poor unloved backrefs) */
8860 if (back->full_backref || !back->is_data)
8863 dback = to_data_backref(back);
8865 /* We found this one, we don't need to do a lookup */
8866 if (dback->found_ref)
8869 key.objectid = dback->root;
8870 key.type = BTRFS_ROOT_ITEM_KEY;
8871 key.offset = (u64)-1;
8873 root = btrfs_read_fs_root(info, &key);
8875 /* No root, definitely a bad ref, skip */
8876 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8878 /* Other err, exit */
8880 return PTR_ERR(root);
8882 key.objectid = dback->owner;
8883 key.type = BTRFS_EXTENT_DATA_KEY;
8884 key.offset = dback->offset;
8885 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8887 btrfs_release_path(path);
8890 /* Didn't find it, we can carry on */
8895 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8896 struct btrfs_file_extent_item);
8897 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8898 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8899 btrfs_release_path(path);
8900 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8902 struct extent_record *tmp;
8903 tmp = container_of(cache, struct extent_record, cache);
8906 * If we found an extent record for the bytenr for this
8907 * particular backref then we can't add it to our
8908 * current extent record. We only want to add backrefs
8909 * that don't have a corresponding extent item in the
8910 * extent tree since they likely belong to this record
8911 * and we need to fix it if it doesn't match bytenrs.
8917 dback->found_ref += 1;
8918 dback->disk_bytenr = bytenr;
8919 dback->bytes = bytes;
8922 * Set this so the verify backref code knows not to trust the
8923 * values in this backref.
8932 * Record orphan data ref into corresponding root.
8934 * Return 0 if the extent item contains data ref and recorded.
8935 * Return 1 if the extent item contains no useful data ref
8936 * On that case, it may contains only shared_dataref or metadata backref
8937 * or the file extent exists(this should be handled by the extent bytenr
8939 * Return <0 if something goes wrong.
8941 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8942 struct extent_record *rec)
8944 struct btrfs_key key;
8945 struct btrfs_root *dest_root;
8946 struct extent_backref *back;
8947 struct data_backref *dback;
8948 struct orphan_data_extent *orphan;
8949 struct btrfs_path path;
8950 int recorded_data_ref = 0;
8955 btrfs_init_path(&path);
8956 list_for_each_entry(back, &rec->backrefs, list) {
8957 if (back->full_backref || !back->is_data ||
8958 !back->found_extent_tree)
8960 dback = to_data_backref(back);
8961 if (dback->found_ref)
8963 key.objectid = dback->root;
8964 key.type = BTRFS_ROOT_ITEM_KEY;
8965 key.offset = (u64)-1;
8967 dest_root = btrfs_read_fs_root(fs_info, &key);
8969 /* For non-exist root we just skip it */
8970 if (IS_ERR(dest_root) || !dest_root)
8973 key.objectid = dback->owner;
8974 key.type = BTRFS_EXTENT_DATA_KEY;
8975 key.offset = dback->offset;
8977 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8978 btrfs_release_path(&path);
8980 * For ret < 0, it's OK since the fs-tree may be corrupted,
8981 * we need to record it for inode/file extent rebuild.
8982 * For ret > 0, we record it only for file extent rebuild.
8983 * For ret == 0, the file extent exists but only bytenr
8984 * mismatch, let the original bytenr fix routine to handle,
8990 orphan = malloc(sizeof(*orphan));
8995 INIT_LIST_HEAD(&orphan->list);
8996 orphan->root = dback->root;
8997 orphan->objectid = dback->owner;
8998 orphan->offset = dback->offset;
8999 orphan->disk_bytenr = rec->cache.start;
9000 orphan->disk_len = rec->cache.size;
9001 list_add(&dest_root->orphan_data_extents, &orphan->list);
9002 recorded_data_ref = 1;
9005 btrfs_release_path(&path);
9007 return !recorded_data_ref;
9013 * when an incorrect extent item is found, this will delete
9014 * all of the existing entries for it and recreate them
9015 * based on what the tree scan found.
9017 static int fixup_extent_refs(struct btrfs_fs_info *info,
9018 struct cache_tree *extent_cache,
9019 struct extent_record *rec)
9021 struct btrfs_trans_handle *trans = NULL;
9023 struct btrfs_path path;
9024 struct list_head *cur = rec->backrefs.next;
9025 struct cache_extent *cache;
9026 struct extent_backref *back;
9030 if (rec->flag_block_full_backref)
9031 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9033 btrfs_init_path(&path);
9034 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9036 * Sometimes the backrefs themselves are so broken they don't
9037 * get attached to any meaningful rec, so first go back and
9038 * check any of our backrefs that we couldn't find and throw
9039 * them into the list if we find the backref so that
9040 * verify_backrefs can figure out what to do.
9042 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9047 /* step one, make sure all of the backrefs agree */
9048 ret = verify_backrefs(info, &path, rec);
9052 trans = btrfs_start_transaction(info->extent_root, 1);
9053 if (IS_ERR(trans)) {
9054 ret = PTR_ERR(trans);
9058 /* step two, delete all the existing records */
9059 ret = delete_extent_records(trans, info->extent_root, &path,
9065 /* was this block corrupt? If so, don't add references to it */
9066 cache = lookup_cache_extent(info->corrupt_blocks,
9067 rec->start, rec->max_size);
9073 /* step three, recreate all the refs we did find */
9074 while(cur != &rec->backrefs) {
9075 back = to_extent_backref(cur);
9079 * if we didn't find any references, don't create a
9082 if (!back->found_ref)
9085 rec->bad_full_backref = 0;
9086 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9094 int err = btrfs_commit_transaction(trans, info->extent_root);
9100 fprintf(stderr, "Repaired extent references for %llu\n",
9101 (unsigned long long)rec->start);
9103 btrfs_release_path(&path);
9107 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9108 struct extent_record *rec)
9110 struct btrfs_trans_handle *trans;
9111 struct btrfs_root *root = fs_info->extent_root;
9112 struct btrfs_path path;
9113 struct btrfs_extent_item *ei;
9114 struct btrfs_key key;
9118 key.objectid = rec->start;
9119 if (rec->metadata) {
9120 key.type = BTRFS_METADATA_ITEM_KEY;
9121 key.offset = rec->info_level;
9123 key.type = BTRFS_EXTENT_ITEM_KEY;
9124 key.offset = rec->max_size;
9127 trans = btrfs_start_transaction(root, 0);
9129 return PTR_ERR(trans);
9131 btrfs_init_path(&path);
9132 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9134 btrfs_release_path(&path);
9135 btrfs_commit_transaction(trans, root);
9138 fprintf(stderr, "Didn't find extent for %llu\n",
9139 (unsigned long long)rec->start);
9140 btrfs_release_path(&path);
9141 btrfs_commit_transaction(trans, root);
9145 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9146 struct btrfs_extent_item);
9147 flags = btrfs_extent_flags(path.nodes[0], ei);
9148 if (rec->flag_block_full_backref) {
9149 fprintf(stderr, "setting full backref on %llu\n",
9150 (unsigned long long)key.objectid);
9151 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9153 fprintf(stderr, "clearing full backref on %llu\n",
9154 (unsigned long long)key.objectid);
9155 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9157 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9158 btrfs_mark_buffer_dirty(path.nodes[0]);
9159 btrfs_release_path(&path);
9160 ret = btrfs_commit_transaction(trans, root);
9162 fprintf(stderr, "Repaired extent flags for %llu\n",
9163 (unsigned long long)rec->start);
9168 /* right now we only prune from the extent allocation tree */
9169 static int prune_one_block(struct btrfs_trans_handle *trans,
9170 struct btrfs_fs_info *info,
9171 struct btrfs_corrupt_block *corrupt)
9174 struct btrfs_path path;
9175 struct extent_buffer *eb;
9179 int level = corrupt->level + 1;
9181 btrfs_init_path(&path);
9183 /* we want to stop at the parent to our busted block */
9184 path.lowest_level = level;
9186 ret = btrfs_search_slot(trans, info->extent_root,
9187 &corrupt->key, &path, -1, 1);
9192 eb = path.nodes[level];
9199 * hopefully the search gave us the block we want to prune,
9200 * lets try that first
9202 slot = path.slots[level];
9203 found = btrfs_node_blockptr(eb, slot);
9204 if (found == corrupt->cache.start)
9207 nritems = btrfs_header_nritems(eb);
9209 /* the search failed, lets scan this node and hope we find it */
9210 for (slot = 0; slot < nritems; slot++) {
9211 found = btrfs_node_blockptr(eb, slot);
9212 if (found == corrupt->cache.start)
9216 * we couldn't find the bad block. TODO, search all the nodes for pointers
9219 if (eb == info->extent_root->node) {
9224 btrfs_release_path(&path);
9229 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9230 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9233 btrfs_release_path(&path);
9237 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9239 struct btrfs_trans_handle *trans = NULL;
9240 struct cache_extent *cache;
9241 struct btrfs_corrupt_block *corrupt;
9244 cache = search_cache_extent(info->corrupt_blocks, 0);
9248 trans = btrfs_start_transaction(info->extent_root, 1);
9250 return PTR_ERR(trans);
9252 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9253 prune_one_block(trans, info, corrupt);
9254 remove_cache_extent(info->corrupt_blocks, cache);
9257 return btrfs_commit_transaction(trans, info->extent_root);
9261 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9263 struct btrfs_block_group_cache *cache;
9268 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9269 &start, &end, EXTENT_DIRTY);
9272 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9277 cache = btrfs_lookup_first_block_group(fs_info, start);
9282 start = cache->key.objectid + cache->key.offset;
9286 static int check_extent_refs(struct btrfs_root *root,
9287 struct cache_tree *extent_cache)
9289 struct extent_record *rec;
9290 struct cache_extent *cache;
9296 * if we're doing a repair, we have to make sure
9297 * we don't allocate from the problem extents.
9298 * In the worst case, this will be all the
9301 cache = search_cache_extent(extent_cache, 0);
9303 rec = container_of(cache, struct extent_record, cache);
9304 set_extent_dirty(root->fs_info->excluded_extents,
9306 rec->start + rec->max_size - 1);
9307 cache = next_cache_extent(cache);
9310 /* pin down all the corrupted blocks too */
9311 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9313 set_extent_dirty(root->fs_info->excluded_extents,
9315 cache->start + cache->size - 1);
9316 cache = next_cache_extent(cache);
9318 prune_corrupt_blocks(root->fs_info);
9319 reset_cached_block_groups(root->fs_info);
9322 reset_cached_block_groups(root->fs_info);
9325 * We need to delete any duplicate entries we find first otherwise we
9326 * could mess up the extent tree when we have backrefs that actually
9327 * belong to a different extent item and not the weird duplicate one.
9329 while (repair && !list_empty(&duplicate_extents)) {
9330 rec = to_extent_record(duplicate_extents.next);
9331 list_del_init(&rec->list);
9333 /* Sometimes we can find a backref before we find an actual
9334 * extent, so we need to process it a little bit to see if there
9335 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9336 * if this is a backref screwup. If we need to delete stuff
9337 * process_duplicates() will return 0, otherwise it will return
9340 if (process_duplicates(extent_cache, rec))
9342 ret = delete_duplicate_records(root, rec);
9346 * delete_duplicate_records will return the number of entries
9347 * deleted, so if it's greater than 0 then we know we actually
9348 * did something and we need to remove.
9361 cache = search_cache_extent(extent_cache, 0);
9364 rec = container_of(cache, struct extent_record, cache);
9365 if (rec->num_duplicates) {
9366 fprintf(stderr, "extent item %llu has multiple extent "
9367 "items\n", (unsigned long long)rec->start);
9371 if (rec->refs != rec->extent_item_refs) {
9372 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9373 (unsigned long long)rec->start,
9374 (unsigned long long)rec->nr);
9375 fprintf(stderr, "extent item %llu, found %llu\n",
9376 (unsigned long long)rec->extent_item_refs,
9377 (unsigned long long)rec->refs);
9378 ret = record_orphan_data_extents(root->fs_info, rec);
9384 if (all_backpointers_checked(rec, 1)) {
9385 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9386 (unsigned long long)rec->start,
9387 (unsigned long long)rec->nr);
9391 if (!rec->owner_ref_checked) {
9392 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9393 (unsigned long long)rec->start,
9394 (unsigned long long)rec->nr);
9399 if (repair && fix) {
9400 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9406 if (rec->bad_full_backref) {
9407 fprintf(stderr, "bad full backref, on [%llu]\n",
9408 (unsigned long long)rec->start);
9410 ret = fixup_extent_flags(root->fs_info, rec);
9418 * Although it's not a extent ref's problem, we reuse this
9419 * routine for error reporting.
9420 * No repair function yet.
9422 if (rec->crossing_stripes) {
9424 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9425 rec->start, rec->start + rec->max_size);
9429 if (rec->wrong_chunk_type) {
9431 "bad extent [%llu, %llu), type mismatch with chunk\n",
9432 rec->start, rec->start + rec->max_size);
9436 remove_cache_extent(extent_cache, cache);
9437 free_all_extent_backrefs(rec);
9438 if (!init_extent_tree && repair && (!cur_err || fix))
9439 clear_extent_dirty(root->fs_info->excluded_extents,
9441 rec->start + rec->max_size - 1);
9446 if (ret && ret != -EAGAIN) {
9447 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9450 struct btrfs_trans_handle *trans;
9452 root = root->fs_info->extent_root;
9453 trans = btrfs_start_transaction(root, 1);
9454 if (IS_ERR(trans)) {
9455 ret = PTR_ERR(trans);
9459 btrfs_fix_block_accounting(trans, root);
9460 ret = btrfs_commit_transaction(trans, root);
9469 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9473 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9474 stripe_size = length;
9475 stripe_size /= num_stripes;
9476 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9477 stripe_size = length * 2;
9478 stripe_size /= num_stripes;
9479 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9480 stripe_size = length;
9481 stripe_size /= (num_stripes - 1);
9482 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9483 stripe_size = length;
9484 stripe_size /= (num_stripes - 2);
9486 stripe_size = length;
9492 * Check the chunk with its block group/dev list ref:
9493 * Return 0 if all refs seems valid.
9494 * Return 1 if part of refs seems valid, need later check for rebuild ref
9495 * like missing block group and needs to search extent tree to rebuild them.
9496 * Return -1 if essential refs are missing and unable to rebuild.
9498 static int check_chunk_refs(struct chunk_record *chunk_rec,
9499 struct block_group_tree *block_group_cache,
9500 struct device_extent_tree *dev_extent_cache,
9503 struct cache_extent *block_group_item;
9504 struct block_group_record *block_group_rec;
9505 struct cache_extent *dev_extent_item;
9506 struct device_extent_record *dev_extent_rec;
9510 int metadump_v2 = 0;
9514 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9517 if (block_group_item) {
9518 block_group_rec = container_of(block_group_item,
9519 struct block_group_record,
9521 if (chunk_rec->length != block_group_rec->offset ||
9522 chunk_rec->offset != block_group_rec->objectid ||
9524 chunk_rec->type_flags != block_group_rec->flags)) {
9527 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9528 chunk_rec->objectid,
9533 chunk_rec->type_flags,
9534 block_group_rec->objectid,
9535 block_group_rec->type,
9536 block_group_rec->offset,
9537 block_group_rec->offset,
9538 block_group_rec->objectid,
9539 block_group_rec->flags);
9542 list_del_init(&block_group_rec->list);
9543 chunk_rec->bg_rec = block_group_rec;
9548 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9549 chunk_rec->objectid,
9554 chunk_rec->type_flags);
9561 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9562 chunk_rec->num_stripes);
9563 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9564 devid = chunk_rec->stripes[i].devid;
9565 offset = chunk_rec->stripes[i].offset;
9566 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9567 devid, offset, length);
9568 if (dev_extent_item) {
9569 dev_extent_rec = container_of(dev_extent_item,
9570 struct device_extent_record,
9572 if (dev_extent_rec->objectid != devid ||
9573 dev_extent_rec->offset != offset ||
9574 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9575 dev_extent_rec->length != length) {
9578 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9579 chunk_rec->objectid,
9582 chunk_rec->stripes[i].devid,
9583 chunk_rec->stripes[i].offset,
9584 dev_extent_rec->objectid,
9585 dev_extent_rec->offset,
9586 dev_extent_rec->length);
9589 list_move(&dev_extent_rec->chunk_list,
9590 &chunk_rec->dextents);
9595 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9596 chunk_rec->objectid,
9599 chunk_rec->stripes[i].devid,
9600 chunk_rec->stripes[i].offset);
9607 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9608 int check_chunks(struct cache_tree *chunk_cache,
9609 struct block_group_tree *block_group_cache,
9610 struct device_extent_tree *dev_extent_cache,
9611 struct list_head *good, struct list_head *bad,
9612 struct list_head *rebuild, int silent)
9614 struct cache_extent *chunk_item;
9615 struct chunk_record *chunk_rec;
9616 struct block_group_record *bg_rec;
9617 struct device_extent_record *dext_rec;
9621 chunk_item = first_cache_extent(chunk_cache);
9622 while (chunk_item) {
9623 chunk_rec = container_of(chunk_item, struct chunk_record,
9625 err = check_chunk_refs(chunk_rec, block_group_cache,
9626 dev_extent_cache, silent);
9629 if (err == 0 && good)
9630 list_add_tail(&chunk_rec->list, good);
9631 if (err > 0 && rebuild)
9632 list_add_tail(&chunk_rec->list, rebuild);
9634 list_add_tail(&chunk_rec->list, bad);
9635 chunk_item = next_cache_extent(chunk_item);
9638 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9641 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9649 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9653 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9664 static int check_device_used(struct device_record *dev_rec,
9665 struct device_extent_tree *dext_cache)
9667 struct cache_extent *cache;
9668 struct device_extent_record *dev_extent_rec;
9671 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9673 dev_extent_rec = container_of(cache,
9674 struct device_extent_record,
9676 if (dev_extent_rec->objectid != dev_rec->devid)
9679 list_del_init(&dev_extent_rec->device_list);
9680 total_byte += dev_extent_rec->length;
9681 cache = next_cache_extent(cache);
9684 if (total_byte != dev_rec->byte_used) {
9686 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9687 total_byte, dev_rec->byte_used, dev_rec->objectid,
9688 dev_rec->type, dev_rec->offset);
9695 /* check btrfs_dev_item -> btrfs_dev_extent */
9696 static int check_devices(struct rb_root *dev_cache,
9697 struct device_extent_tree *dev_extent_cache)
9699 struct rb_node *dev_node;
9700 struct device_record *dev_rec;
9701 struct device_extent_record *dext_rec;
9705 dev_node = rb_first(dev_cache);
9707 dev_rec = container_of(dev_node, struct device_record, node);
9708 err = check_device_used(dev_rec, dev_extent_cache);
9712 dev_node = rb_next(dev_node);
9714 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9717 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9718 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9725 static int add_root_item_to_list(struct list_head *head,
9726 u64 objectid, u64 bytenr, u64 last_snapshot,
9727 u8 level, u8 drop_level,
9728 int level_size, struct btrfs_key *drop_key)
9731 struct root_item_record *ri_rec;
9732 ri_rec = malloc(sizeof(*ri_rec));
9735 ri_rec->bytenr = bytenr;
9736 ri_rec->objectid = objectid;
9737 ri_rec->level = level;
9738 ri_rec->level_size = level_size;
9739 ri_rec->drop_level = drop_level;
9740 ri_rec->last_snapshot = last_snapshot;
9742 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9743 list_add_tail(&ri_rec->list, head);
9748 static void free_root_item_list(struct list_head *list)
9750 struct root_item_record *ri_rec;
9752 while (!list_empty(list)) {
9753 ri_rec = list_first_entry(list, struct root_item_record,
9755 list_del_init(&ri_rec->list);
9760 static int deal_root_from_list(struct list_head *list,
9761 struct btrfs_root *root,
9762 struct block_info *bits,
9764 struct cache_tree *pending,
9765 struct cache_tree *seen,
9766 struct cache_tree *reada,
9767 struct cache_tree *nodes,
9768 struct cache_tree *extent_cache,
9769 struct cache_tree *chunk_cache,
9770 struct rb_root *dev_cache,
9771 struct block_group_tree *block_group_cache,
9772 struct device_extent_tree *dev_extent_cache)
9777 while (!list_empty(list)) {
9778 struct root_item_record *rec;
9779 struct extent_buffer *buf;
9780 rec = list_entry(list->next,
9781 struct root_item_record, list);
9783 buf = read_tree_block(root->fs_info,
9784 rec->bytenr, rec->level_size, 0);
9785 if (!extent_buffer_uptodate(buf)) {
9786 free_extent_buffer(buf);
9790 ret = add_root_to_pending(buf, extent_cache, pending,
9791 seen, nodes, rec->objectid);
9795 * To rebuild extent tree, we need deal with snapshot
9796 * one by one, otherwise we deal with node firstly which
9797 * can maximize readahead.
9800 ret = run_next_block(root, bits, bits_nr, &last,
9801 pending, seen, reada, nodes,
9802 extent_cache, chunk_cache,
9803 dev_cache, block_group_cache,
9804 dev_extent_cache, rec);
9808 free_extent_buffer(buf);
9809 list_del(&rec->list);
9815 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9816 reada, nodes, extent_cache, chunk_cache,
9817 dev_cache, block_group_cache,
9818 dev_extent_cache, NULL);
9828 static int check_chunks_and_extents(struct btrfs_root *root)
9830 struct rb_root dev_cache;
9831 struct cache_tree chunk_cache;
9832 struct block_group_tree block_group_cache;
9833 struct device_extent_tree dev_extent_cache;
9834 struct cache_tree extent_cache;
9835 struct cache_tree seen;
9836 struct cache_tree pending;
9837 struct cache_tree reada;
9838 struct cache_tree nodes;
9839 struct extent_io_tree excluded_extents;
9840 struct cache_tree corrupt_blocks;
9841 struct btrfs_path path;
9842 struct btrfs_key key;
9843 struct btrfs_key found_key;
9845 struct block_info *bits;
9847 struct extent_buffer *leaf;
9849 struct btrfs_root_item ri;
9850 struct list_head dropping_trees;
9851 struct list_head normal_trees;
9852 struct btrfs_root *root1;
9857 dev_cache = RB_ROOT;
9858 cache_tree_init(&chunk_cache);
9859 block_group_tree_init(&block_group_cache);
9860 device_extent_tree_init(&dev_extent_cache);
9862 cache_tree_init(&extent_cache);
9863 cache_tree_init(&seen);
9864 cache_tree_init(&pending);
9865 cache_tree_init(&nodes);
9866 cache_tree_init(&reada);
9867 cache_tree_init(&corrupt_blocks);
9868 extent_io_tree_init(&excluded_extents);
9869 INIT_LIST_HEAD(&dropping_trees);
9870 INIT_LIST_HEAD(&normal_trees);
9873 root->fs_info->excluded_extents = &excluded_extents;
9874 root->fs_info->fsck_extent_cache = &extent_cache;
9875 root->fs_info->free_extent_hook = free_extent_hook;
9876 root->fs_info->corrupt_blocks = &corrupt_blocks;
9880 bits = malloc(bits_nr * sizeof(struct block_info));
9886 if (ctx.progress_enabled) {
9887 ctx.tp = TASK_EXTENTS;
9888 task_start(ctx.info);
9892 root1 = root->fs_info->tree_root;
9893 level = btrfs_header_level(root1->node);
9894 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9895 root1->node->start, 0, level, 0,
9896 root1->fs_info->nodesize, NULL);
9899 root1 = root->fs_info->chunk_root;
9900 level = btrfs_header_level(root1->node);
9901 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9902 root1->node->start, 0, level, 0,
9903 root1->fs_info->nodesize, NULL);
9906 btrfs_init_path(&path);
9909 key.type = BTRFS_ROOT_ITEM_KEY;
9910 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9915 leaf = path.nodes[0];
9916 slot = path.slots[0];
9917 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9918 ret = btrfs_next_leaf(root, &path);
9921 leaf = path.nodes[0];
9922 slot = path.slots[0];
9924 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9925 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9926 unsigned long offset;
9929 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9930 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9931 last_snapshot = btrfs_root_last_snapshot(&ri);
9932 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9933 level = btrfs_root_level(&ri);
9934 level_size = root->fs_info->nodesize;
9935 ret = add_root_item_to_list(&normal_trees,
9937 btrfs_root_bytenr(&ri),
9938 last_snapshot, level,
9939 0, level_size, NULL);
9943 level = btrfs_root_level(&ri);
9944 level_size = root->fs_info->nodesize;
9945 objectid = found_key.objectid;
9946 btrfs_disk_key_to_cpu(&found_key,
9948 ret = add_root_item_to_list(&dropping_trees,
9950 btrfs_root_bytenr(&ri),
9951 last_snapshot, level,
9953 level_size, &found_key);
9960 btrfs_release_path(&path);
9963 * check_block can return -EAGAIN if it fixes something, please keep
9964 * this in mind when dealing with return values from these functions, if
9965 * we get -EAGAIN we want to fall through and restart the loop.
9967 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9968 &seen, &reada, &nodes, &extent_cache,
9969 &chunk_cache, &dev_cache, &block_group_cache,
9976 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9977 &pending, &seen, &reada, &nodes,
9978 &extent_cache, &chunk_cache, &dev_cache,
9979 &block_group_cache, &dev_extent_cache);
9986 ret = check_chunks(&chunk_cache, &block_group_cache,
9987 &dev_extent_cache, NULL, NULL, NULL, 0);
9994 ret = check_extent_refs(root, &extent_cache);
10001 ret = check_devices(&dev_cache, &dev_extent_cache);
10006 task_stop(ctx.info);
10008 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10009 extent_io_tree_cleanup(&excluded_extents);
10010 root->fs_info->fsck_extent_cache = NULL;
10011 root->fs_info->free_extent_hook = NULL;
10012 root->fs_info->corrupt_blocks = NULL;
10013 root->fs_info->excluded_extents = NULL;
10016 free_chunk_cache_tree(&chunk_cache);
10017 free_device_cache_tree(&dev_cache);
10018 free_block_group_tree(&block_group_cache);
10019 free_device_extent_tree(&dev_extent_cache);
10020 free_extent_cache_tree(&seen);
10021 free_extent_cache_tree(&pending);
10022 free_extent_cache_tree(&reada);
10023 free_extent_cache_tree(&nodes);
10024 free_root_item_list(&normal_trees);
10025 free_root_item_list(&dropping_trees);
10028 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10029 free_extent_cache_tree(&seen);
10030 free_extent_cache_tree(&pending);
10031 free_extent_cache_tree(&reada);
10032 free_extent_cache_tree(&nodes);
10033 free_chunk_cache_tree(&chunk_cache);
10034 free_block_group_tree(&block_group_cache);
10035 free_device_cache_tree(&dev_cache);
10036 free_device_extent_tree(&dev_extent_cache);
10037 free_extent_record_cache(&extent_cache);
10038 free_root_item_list(&normal_trees);
10039 free_root_item_list(&dropping_trees);
10040 extent_io_tree_cleanup(&excluded_extents);
10045 * Check backrefs of a tree block given by @bytenr or @eb.
10047 * @root: the root containing the @bytenr or @eb
10048 * @eb: tree block extent buffer, can be NULL
10049 * @bytenr: bytenr of the tree block to search
10050 * @level: tree level of the tree block
10051 * @owner: owner of the tree block
10053 * Return >0 for any error found and output error message
10054 * Return 0 for no error found
10056 static int check_tree_block_ref(struct btrfs_root *root,
10057 struct extent_buffer *eb, u64 bytenr,
10058 int level, u64 owner)
10060 struct btrfs_key key;
10061 struct btrfs_root *extent_root = root->fs_info->extent_root;
10062 struct btrfs_path path;
10063 struct btrfs_extent_item *ei;
10064 struct btrfs_extent_inline_ref *iref;
10065 struct extent_buffer *leaf;
10071 u32 nodesize = root->fs_info->nodesize;
10074 int tree_reloc_root = 0;
10079 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10080 btrfs_header_bytenr(root->node) == bytenr)
10081 tree_reloc_root = 1;
10083 btrfs_init_path(&path);
10084 key.objectid = bytenr;
10085 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10086 key.type = BTRFS_METADATA_ITEM_KEY;
10088 key.type = BTRFS_EXTENT_ITEM_KEY;
10089 key.offset = (u64)-1;
10091 /* Search for the backref in extent tree */
10092 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10094 err |= BACKREF_MISSING;
10097 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10099 err |= BACKREF_MISSING;
10103 leaf = path.nodes[0];
10104 slot = path.slots[0];
10105 btrfs_item_key_to_cpu(leaf, &key, slot);
10107 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10109 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10110 skinny_level = (int)key.offset;
10111 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10113 struct btrfs_tree_block_info *info;
10115 info = (struct btrfs_tree_block_info *)(ei + 1);
10116 skinny_level = btrfs_tree_block_level(leaf, info);
10117 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10124 if (!(btrfs_extent_flags(leaf, ei) &
10125 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10127 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10128 key.objectid, nodesize,
10129 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10130 err = BACKREF_MISMATCH;
10132 header_gen = btrfs_header_generation(eb);
10133 extent_gen = btrfs_extent_generation(leaf, ei);
10134 if (header_gen != extent_gen) {
10136 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10137 key.objectid, nodesize, header_gen,
10139 err = BACKREF_MISMATCH;
10141 if (level != skinny_level) {
10143 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10144 key.objectid, nodesize, level, skinny_level);
10145 err = BACKREF_MISMATCH;
10147 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10149 "extent[%llu %u] is referred by other roots than %llu",
10150 key.objectid, nodesize, root->objectid);
10151 err = BACKREF_MISMATCH;
10156 * Iterate the extent/metadata item to find the exact backref
10158 item_size = btrfs_item_size_nr(leaf, slot);
10159 ptr = (unsigned long)iref;
10160 end = (unsigned long)ei + item_size;
10161 while (ptr < end) {
10162 iref = (struct btrfs_extent_inline_ref *)ptr;
10163 type = btrfs_extent_inline_ref_type(leaf, iref);
10164 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10166 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10167 (offset == root->objectid || offset == owner)) {
10169 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10171 * Backref of tree reloc root points to itself, no need
10172 * to check backref any more.
10174 if (tree_reloc_root)
10177 /* Check if the backref points to valid referencer */
10178 found_ref = !check_tree_block_ref(root, NULL,
10179 offset, level + 1, owner);
10184 ptr += btrfs_extent_inline_ref_size(type);
10188 * Inlined extent item doesn't have what we need, check
10189 * TREE_BLOCK_REF_KEY
10192 btrfs_release_path(&path);
10193 key.objectid = bytenr;
10194 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10195 key.offset = root->objectid;
10197 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10202 err |= BACKREF_MISSING;
10204 btrfs_release_path(&path);
10205 if (eb && (err & BACKREF_MISSING))
10206 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10207 bytenr, nodesize, owner, level);
10212 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10214 * Return >0 any error found and output error message
10215 * Return 0 for no error found
10217 static int check_extent_data_item(struct btrfs_root *root,
10218 struct extent_buffer *eb, int slot)
10220 struct btrfs_file_extent_item *fi;
10221 struct btrfs_path path;
10222 struct btrfs_root *extent_root = root->fs_info->extent_root;
10223 struct btrfs_key fi_key;
10224 struct btrfs_key dbref_key;
10225 struct extent_buffer *leaf;
10226 struct btrfs_extent_item *ei;
10227 struct btrfs_extent_inline_ref *iref;
10228 struct btrfs_extent_data_ref *dref;
10231 u64 disk_num_bytes;
10232 u64 extent_num_bytes;
10239 int found_dbackref = 0;
10243 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10244 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10246 /* Nothing to check for hole and inline data extents */
10247 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10248 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10251 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10252 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10253 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10255 /* Check unaligned disk_num_bytes and num_bytes */
10256 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10258 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10259 fi_key.objectid, fi_key.offset, disk_num_bytes,
10260 root->fs_info->sectorsize);
10261 err |= BYTES_UNALIGNED;
10263 data_bytes_allocated += disk_num_bytes;
10265 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10267 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10268 fi_key.objectid, fi_key.offset, extent_num_bytes,
10269 root->fs_info->sectorsize);
10270 err |= BYTES_UNALIGNED;
10272 data_bytes_referenced += extent_num_bytes;
10274 owner = btrfs_header_owner(eb);
10276 /* Check the extent item of the file extent in extent tree */
10277 btrfs_init_path(&path);
10278 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10279 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10280 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10282 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10286 leaf = path.nodes[0];
10287 slot = path.slots[0];
10288 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10290 extent_flags = btrfs_extent_flags(leaf, ei);
10292 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10294 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10295 disk_bytenr, disk_num_bytes,
10296 BTRFS_EXTENT_FLAG_DATA);
10297 err |= BACKREF_MISMATCH;
10300 /* Check data backref inside that extent item */
10301 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10302 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10303 ptr = (unsigned long)iref;
10304 end = (unsigned long)ei + item_size;
10305 while (ptr < end) {
10306 iref = (struct btrfs_extent_inline_ref *)ptr;
10307 type = btrfs_extent_inline_ref_type(leaf, iref);
10308 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10310 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10311 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10312 if (ref_root == owner || ref_root == root->objectid)
10313 found_dbackref = 1;
10314 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10315 found_dbackref = !check_tree_block_ref(root, NULL,
10316 btrfs_extent_inline_ref_offset(leaf, iref),
10320 if (found_dbackref)
10322 ptr += btrfs_extent_inline_ref_size(type);
10325 if (!found_dbackref) {
10326 btrfs_release_path(&path);
10328 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10329 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10330 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10331 dbref_key.offset = hash_extent_data_ref(root->objectid,
10332 fi_key.objectid, fi_key.offset);
10334 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10335 &dbref_key, &path, 0, 0);
10337 found_dbackref = 1;
10341 btrfs_release_path(&path);
10344 * Neither inlined nor EXTENT_DATA_REF found, try
10345 * SHARED_DATA_REF as last chance.
10347 dbref_key.objectid = disk_bytenr;
10348 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10349 dbref_key.offset = eb->start;
10351 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10352 &dbref_key, &path, 0, 0);
10354 found_dbackref = 1;
10360 if (!found_dbackref)
10361 err |= BACKREF_MISSING;
10362 btrfs_release_path(&path);
10363 if (err & BACKREF_MISSING) {
10364 error("data extent[%llu %llu] backref lost",
10365 disk_bytenr, disk_num_bytes);
10371 * Get real tree block level for the case like shared block
10372 * Return >= 0 as tree level
10373 * Return <0 for error
10375 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10377 struct extent_buffer *eb;
10378 struct btrfs_path path;
10379 struct btrfs_key key;
10380 struct btrfs_extent_item *ei;
10383 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10388 /* Search extent tree for extent generation and level */
10389 key.objectid = bytenr;
10390 key.type = BTRFS_METADATA_ITEM_KEY;
10391 key.offset = (u64)-1;
10393 btrfs_init_path(&path);
10394 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10397 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10405 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10406 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10407 struct btrfs_extent_item);
10408 flags = btrfs_extent_flags(path.nodes[0], ei);
10409 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10414 /* Get transid for later read_tree_block() check */
10415 transid = btrfs_extent_generation(path.nodes[0], ei);
10417 /* Get backref level as one source */
10418 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10419 backref_level = key.offset;
10421 struct btrfs_tree_block_info *info;
10423 info = (struct btrfs_tree_block_info *)(ei + 1);
10424 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10426 btrfs_release_path(&path);
10428 /* Get level from tree block as an alternative source */
10429 eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10430 if (!extent_buffer_uptodate(eb)) {
10431 free_extent_buffer(eb);
10434 header_level = btrfs_header_level(eb);
10435 free_extent_buffer(eb);
10437 if (header_level != backref_level)
10439 return header_level;
10442 btrfs_release_path(&path);
10447 * Check if a tree block backref is valid (points to a valid tree block)
10448 * if level == -1, level will be resolved
10449 * Return >0 for any error found and print error message
10451 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10452 u64 bytenr, int level)
10454 struct btrfs_root *root;
10455 struct btrfs_key key;
10456 struct btrfs_path path;
10457 struct extent_buffer *eb;
10458 struct extent_buffer *node;
10459 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10463 /* Query level for level == -1 special case */
10465 level = query_tree_block_level(fs_info, bytenr);
10467 err |= REFERENCER_MISSING;
10471 key.objectid = root_id;
10472 key.type = BTRFS_ROOT_ITEM_KEY;
10473 key.offset = (u64)-1;
10475 root = btrfs_read_fs_root(fs_info, &key);
10476 if (IS_ERR(root)) {
10477 err |= REFERENCER_MISSING;
10481 /* Read out the tree block to get item/node key */
10482 eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10483 if (!extent_buffer_uptodate(eb)) {
10484 err |= REFERENCER_MISSING;
10485 free_extent_buffer(eb);
10489 /* Empty tree, no need to check key */
10490 if (!btrfs_header_nritems(eb) && !level) {
10491 free_extent_buffer(eb);
10496 btrfs_node_key_to_cpu(eb, &key, 0);
10498 btrfs_item_key_to_cpu(eb, &key, 0);
10500 free_extent_buffer(eb);
10502 btrfs_init_path(&path);
10503 path.lowest_level = level;
10504 /* Search with the first key, to ensure we can reach it */
10505 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10507 err |= REFERENCER_MISSING;
10511 node = path.nodes[level];
10512 if (btrfs_header_bytenr(node) != bytenr) {
10514 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10515 bytenr, nodesize, bytenr,
10516 btrfs_header_bytenr(node));
10517 err |= REFERENCER_MISMATCH;
10519 if (btrfs_header_level(node) != level) {
10521 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10522 bytenr, nodesize, level,
10523 btrfs_header_level(node));
10524 err |= REFERENCER_MISMATCH;
10528 btrfs_release_path(&path);
10530 if (err & REFERENCER_MISSING) {
10532 error("extent [%llu %d] lost referencer (owner: %llu)",
10533 bytenr, nodesize, root_id);
10536 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10537 bytenr, nodesize, root_id, level);
10544 * Check if tree block @eb is tree reloc root.
10545 * Return 0 if it's not or any problem happens
10546 * Return 1 if it's a tree reloc root
10548 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10549 struct extent_buffer *eb)
10551 struct btrfs_root *tree_reloc_root;
10552 struct btrfs_key key;
10553 u64 bytenr = btrfs_header_bytenr(eb);
10554 u64 owner = btrfs_header_owner(eb);
10557 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10558 key.offset = owner;
10559 key.type = BTRFS_ROOT_ITEM_KEY;
10561 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10562 if (IS_ERR(tree_reloc_root))
10565 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10567 btrfs_free_fs_root(tree_reloc_root);
10572 * Check referencer for shared block backref
10573 * If level == -1, this function will resolve the level.
10575 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10576 u64 parent, u64 bytenr, int level)
10578 struct extent_buffer *eb;
10579 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10581 int found_parent = 0;
10584 eb = read_tree_block(fs_info, parent, nodesize, 0);
10585 if (!extent_buffer_uptodate(eb))
10589 level = query_tree_block_level(fs_info, bytenr);
10593 /* It's possible it's a tree reloc root */
10594 if (parent == bytenr) {
10595 if (is_tree_reloc_root(fs_info, eb))
10600 if (level + 1 != btrfs_header_level(eb))
10603 nr = btrfs_header_nritems(eb);
10604 for (i = 0; i < nr; i++) {
10605 if (bytenr == btrfs_node_blockptr(eb, i)) {
10611 free_extent_buffer(eb);
10612 if (!found_parent) {
10614 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10615 bytenr, nodesize, parent, level);
10616 return REFERENCER_MISSING;
10622 * Check referencer for normal (inlined) data ref
10623 * If len == 0, it will be resolved by searching in extent tree
10625 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10626 u64 root_id, u64 objectid, u64 offset,
10627 u64 bytenr, u64 len, u32 count)
10629 struct btrfs_root *root;
10630 struct btrfs_root *extent_root = fs_info->extent_root;
10631 struct btrfs_key key;
10632 struct btrfs_path path;
10633 struct extent_buffer *leaf;
10634 struct btrfs_file_extent_item *fi;
10635 u32 found_count = 0;
10640 key.objectid = bytenr;
10641 key.type = BTRFS_EXTENT_ITEM_KEY;
10642 key.offset = (u64)-1;
10644 btrfs_init_path(&path);
10645 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10648 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10651 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10652 if (key.objectid != bytenr ||
10653 key.type != BTRFS_EXTENT_ITEM_KEY)
10656 btrfs_release_path(&path);
10658 key.objectid = root_id;
10659 key.type = BTRFS_ROOT_ITEM_KEY;
10660 key.offset = (u64)-1;
10661 btrfs_init_path(&path);
10663 root = btrfs_read_fs_root(fs_info, &key);
10667 key.objectid = objectid;
10668 key.type = BTRFS_EXTENT_DATA_KEY;
10670 * It can be nasty as data backref offset is
10671 * file offset - file extent offset, which is smaller or
10672 * equal to original backref offset. The only special case is
10673 * overflow. So we need to special check and do further search.
10675 key.offset = offset & (1ULL << 63) ? 0 : offset;
10677 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10682 * Search afterwards to get correct one
10683 * NOTE: As we must do a comprehensive check on the data backref to
10684 * make sure the dref count also matches, we must iterate all file
10685 * extents for that inode.
10688 leaf = path.nodes[0];
10689 slot = path.slots[0];
10691 if (slot >= btrfs_header_nritems(leaf))
10693 btrfs_item_key_to_cpu(leaf, &key, slot);
10694 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10696 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10698 * Except normal disk bytenr and disk num bytes, we still
10699 * need to do extra check on dbackref offset as
10700 * dbackref offset = file_offset - file_extent_offset
10702 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10703 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10704 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10709 ret = btrfs_next_item(root, &path);
10714 btrfs_release_path(&path);
10715 if (found_count != count) {
10717 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10718 bytenr, len, root_id, objectid, offset, count, found_count);
10719 return REFERENCER_MISSING;
10725 * Check if the referencer of a shared data backref exists
10727 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10728 u64 parent, u64 bytenr)
10730 struct extent_buffer *eb;
10731 struct btrfs_key key;
10732 struct btrfs_file_extent_item *fi;
10733 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10735 int found_parent = 0;
10738 eb = read_tree_block(fs_info, parent, nodesize, 0);
10739 if (!extent_buffer_uptodate(eb))
10742 nr = btrfs_header_nritems(eb);
10743 for (i = 0; i < nr; i++) {
10744 btrfs_item_key_to_cpu(eb, &key, i);
10745 if (key.type != BTRFS_EXTENT_DATA_KEY)
10748 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10749 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10752 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10759 free_extent_buffer(eb);
10760 if (!found_parent) {
10761 error("shared extent %llu referencer lost (parent: %llu)",
10763 return REFERENCER_MISSING;
10769 * This function will check a given extent item, including its backref and
10770 * itself (like crossing stripe boundary and type)
10772 * Since we don't use extent_record anymore, introduce new error bit
10774 static int check_extent_item(struct btrfs_fs_info *fs_info,
10775 struct extent_buffer *eb, int slot)
10777 struct btrfs_extent_item *ei;
10778 struct btrfs_extent_inline_ref *iref;
10779 struct btrfs_extent_data_ref *dref;
10783 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10784 u32 item_size = btrfs_item_size_nr(eb, slot);
10789 struct btrfs_key key;
10793 btrfs_item_key_to_cpu(eb, &key, slot);
10794 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10795 bytes_used += key.offset;
10797 bytes_used += nodesize;
10799 if (item_size < sizeof(*ei)) {
10801 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10802 * old thing when on disk format is still un-determined.
10803 * No need to care about it anymore
10805 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10809 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10810 flags = btrfs_extent_flags(eb, ei);
10812 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10814 if (metadata && check_crossing_stripes(global_info, key.objectid,
10816 error("bad metadata [%llu, %llu) crossing stripe boundary",
10817 key.objectid, key.objectid + nodesize);
10818 err |= CROSSING_STRIPE_BOUNDARY;
10821 ptr = (unsigned long)(ei + 1);
10823 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10824 /* Old EXTENT_ITEM metadata */
10825 struct btrfs_tree_block_info *info;
10827 info = (struct btrfs_tree_block_info *)ptr;
10828 level = btrfs_tree_block_level(eb, info);
10829 ptr += sizeof(struct btrfs_tree_block_info);
10831 /* New METADATA_ITEM */
10832 level = key.offset;
10834 end = (unsigned long)ei + item_size;
10837 /* Reached extent item end normally */
10841 /* Beyond extent item end, wrong item size */
10843 err |= ITEM_SIZE_MISMATCH;
10844 error("extent item at bytenr %llu slot %d has wrong size",
10849 /* Now check every backref in this extent item */
10850 iref = (struct btrfs_extent_inline_ref *)ptr;
10851 type = btrfs_extent_inline_ref_type(eb, iref);
10852 offset = btrfs_extent_inline_ref_offset(eb, iref);
10854 case BTRFS_TREE_BLOCK_REF_KEY:
10855 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10859 case BTRFS_SHARED_BLOCK_REF_KEY:
10860 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10864 case BTRFS_EXTENT_DATA_REF_KEY:
10865 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10866 ret = check_extent_data_backref(fs_info,
10867 btrfs_extent_data_ref_root(eb, dref),
10868 btrfs_extent_data_ref_objectid(eb, dref),
10869 btrfs_extent_data_ref_offset(eb, dref),
10870 key.objectid, key.offset,
10871 btrfs_extent_data_ref_count(eb, dref));
10874 case BTRFS_SHARED_DATA_REF_KEY:
10875 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10879 error("extent[%llu %d %llu] has unknown ref type: %d",
10880 key.objectid, key.type, key.offset, type);
10881 err |= UNKNOWN_TYPE;
10885 ptr += btrfs_extent_inline_ref_size(type);
10893 * Check if a dev extent item is referred correctly by its chunk
10895 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10896 struct extent_buffer *eb, int slot)
10898 struct btrfs_root *chunk_root = fs_info->chunk_root;
10899 struct btrfs_dev_extent *ptr;
10900 struct btrfs_path path;
10901 struct btrfs_key chunk_key;
10902 struct btrfs_key devext_key;
10903 struct btrfs_chunk *chunk;
10904 struct extent_buffer *l;
10908 int found_chunk = 0;
10911 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10912 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10913 length = btrfs_dev_extent_length(eb, ptr);
10915 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10916 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10917 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10919 btrfs_init_path(&path);
10920 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10925 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10926 if (btrfs_chunk_length(l, chunk) != length)
10929 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10930 for (i = 0; i < num_stripes; i++) {
10931 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10932 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10934 if (devid == devext_key.objectid &&
10935 offset == devext_key.offset) {
10941 btrfs_release_path(&path);
10942 if (!found_chunk) {
10944 "device extent[%llu, %llu, %llu] did not find the related chunk",
10945 devext_key.objectid, devext_key.offset, length);
10946 return REFERENCER_MISSING;
10952 * Check if the used space is correct with the dev item
10954 static int check_dev_item(struct btrfs_fs_info *fs_info,
10955 struct extent_buffer *eb, int slot)
10957 struct btrfs_root *dev_root = fs_info->dev_root;
10958 struct btrfs_dev_item *dev_item;
10959 struct btrfs_path path;
10960 struct btrfs_key key;
10961 struct btrfs_dev_extent *ptr;
10967 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10968 dev_id = btrfs_device_id(eb, dev_item);
10969 used = btrfs_device_bytes_used(eb, dev_item);
10971 key.objectid = dev_id;
10972 key.type = BTRFS_DEV_EXTENT_KEY;
10975 btrfs_init_path(&path);
10976 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10978 btrfs_item_key_to_cpu(eb, &key, slot);
10979 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10980 key.objectid, key.type, key.offset);
10981 btrfs_release_path(&path);
10982 return REFERENCER_MISSING;
10985 /* Iterate dev_extents to calculate the used space of a device */
10987 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10990 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10991 if (key.objectid > dev_id)
10993 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10996 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10997 struct btrfs_dev_extent);
10998 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11000 ret = btrfs_next_item(dev_root, &path);
11004 btrfs_release_path(&path);
11006 if (used != total) {
11007 btrfs_item_key_to_cpu(eb, &key, slot);
11009 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11010 total, used, BTRFS_ROOT_TREE_OBJECTID,
11011 BTRFS_DEV_EXTENT_KEY, dev_id);
11012 return ACCOUNTING_MISMATCH;
11018 * Check a block group item with its referener (chunk) and its used space
11019 * with extent/metadata item
11021 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11022 struct extent_buffer *eb, int slot)
11024 struct btrfs_root *extent_root = fs_info->extent_root;
11025 struct btrfs_root *chunk_root = fs_info->chunk_root;
11026 struct btrfs_block_group_item *bi;
11027 struct btrfs_block_group_item bg_item;
11028 struct btrfs_path path;
11029 struct btrfs_key bg_key;
11030 struct btrfs_key chunk_key;
11031 struct btrfs_key extent_key;
11032 struct btrfs_chunk *chunk;
11033 struct extent_buffer *leaf;
11034 struct btrfs_extent_item *ei;
11035 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11043 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11044 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11045 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11046 used = btrfs_block_group_used(&bg_item);
11047 bg_flags = btrfs_block_group_flags(&bg_item);
11049 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11050 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11051 chunk_key.offset = bg_key.objectid;
11053 btrfs_init_path(&path);
11054 /* Search for the referencer chunk */
11055 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11058 "block group[%llu %llu] did not find the related chunk item",
11059 bg_key.objectid, bg_key.offset);
11060 err |= REFERENCER_MISSING;
11062 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11063 struct btrfs_chunk);
11064 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11067 "block group[%llu %llu] related chunk item length does not match",
11068 bg_key.objectid, bg_key.offset);
11069 err |= REFERENCER_MISMATCH;
11072 btrfs_release_path(&path);
11074 /* Search from the block group bytenr */
11075 extent_key.objectid = bg_key.objectid;
11076 extent_key.type = 0;
11077 extent_key.offset = 0;
11079 btrfs_init_path(&path);
11080 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11084 /* Iterate extent tree to account used space */
11086 leaf = path.nodes[0];
11088 /* Search slot can point to the last item beyond leaf nritems */
11089 if (path.slots[0] >= btrfs_header_nritems(leaf))
11092 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11093 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11096 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11097 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11099 if (extent_key.objectid < bg_key.objectid)
11102 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11105 total += extent_key.offset;
11107 ei = btrfs_item_ptr(leaf, path.slots[0],
11108 struct btrfs_extent_item);
11109 flags = btrfs_extent_flags(leaf, ei);
11110 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11111 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11113 "bad extent[%llu, %llu) type mismatch with chunk",
11114 extent_key.objectid,
11115 extent_key.objectid + extent_key.offset);
11116 err |= CHUNK_TYPE_MISMATCH;
11118 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11119 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11120 BTRFS_BLOCK_GROUP_METADATA))) {
11122 "bad extent[%llu, %llu) type mismatch with chunk",
11123 extent_key.objectid,
11124 extent_key.objectid + nodesize);
11125 err |= CHUNK_TYPE_MISMATCH;
11129 ret = btrfs_next_item(extent_root, &path);
11135 btrfs_release_path(&path);
11137 if (total != used) {
11139 "block group[%llu %llu] used %llu but extent items used %llu",
11140 bg_key.objectid, bg_key.offset, used, total);
11141 err |= ACCOUNTING_MISMATCH;
11147 * Check a chunk item.
11148 * Including checking all referred dev_extents and block group
11150 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11151 struct extent_buffer *eb, int slot)
11153 struct btrfs_root *extent_root = fs_info->extent_root;
11154 struct btrfs_root *dev_root = fs_info->dev_root;
11155 struct btrfs_path path;
11156 struct btrfs_key chunk_key;
11157 struct btrfs_key bg_key;
11158 struct btrfs_key devext_key;
11159 struct btrfs_chunk *chunk;
11160 struct extent_buffer *leaf;
11161 struct btrfs_block_group_item *bi;
11162 struct btrfs_block_group_item bg_item;
11163 struct btrfs_dev_extent *ptr;
11164 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
11176 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11177 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11178 length = btrfs_chunk_length(eb, chunk);
11179 chunk_end = chunk_key.offset + length;
11180 if (!IS_ALIGNED(length, sectorsize)) {
11181 error("chunk[%llu %llu) not aligned to %u",
11182 chunk_key.offset, chunk_end, sectorsize);
11183 err |= BYTES_UNALIGNED;
11187 type = btrfs_chunk_type(eb, chunk);
11188 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
11189 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
11190 error("chunk[%llu %llu) has no chunk type",
11191 chunk_key.offset, chunk_end);
11192 err |= UNKNOWN_TYPE;
11194 if (profile && (profile & (profile - 1))) {
11195 error("chunk[%llu %llu) multiple profiles detected: %llx",
11196 chunk_key.offset, chunk_end, profile);
11197 err |= UNKNOWN_TYPE;
11200 bg_key.objectid = chunk_key.offset;
11201 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11202 bg_key.offset = length;
11204 btrfs_init_path(&path);
11205 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11208 "chunk[%llu %llu) did not find the related block group item",
11209 chunk_key.offset, chunk_end);
11210 err |= REFERENCER_MISSING;
11212 leaf = path.nodes[0];
11213 bi = btrfs_item_ptr(leaf, path.slots[0],
11214 struct btrfs_block_group_item);
11215 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11217 if (btrfs_block_group_flags(&bg_item) != type) {
11219 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11220 chunk_key.offset, chunk_end, type,
11221 btrfs_block_group_flags(&bg_item));
11222 err |= REFERENCER_MISSING;
11226 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11227 for (i = 0; i < num_stripes; i++) {
11228 btrfs_release_path(&path);
11229 btrfs_init_path(&path);
11230 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11231 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11232 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11234 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11237 goto not_match_dev;
11239 leaf = path.nodes[0];
11240 ptr = btrfs_item_ptr(leaf, path.slots[0],
11241 struct btrfs_dev_extent);
11242 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11243 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11244 if (objectid != chunk_key.objectid ||
11245 offset != chunk_key.offset ||
11246 btrfs_dev_extent_length(leaf, ptr) != length)
11247 goto not_match_dev;
11250 err |= BACKREF_MISSING;
11252 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11253 chunk_key.objectid, chunk_end, i);
11256 btrfs_release_path(&path);
11262 * Main entry function to check known items and update related accounting info
11264 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11266 struct btrfs_fs_info *fs_info = root->fs_info;
11267 struct btrfs_key key;
11270 struct btrfs_extent_data_ref *dref;
11275 btrfs_item_key_to_cpu(eb, &key, slot);
11279 case BTRFS_EXTENT_DATA_KEY:
11280 ret = check_extent_data_item(root, eb, slot);
11283 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11284 ret = check_block_group_item(fs_info, eb, slot);
11287 case BTRFS_DEV_ITEM_KEY:
11288 ret = check_dev_item(fs_info, eb, slot);
11291 case BTRFS_CHUNK_ITEM_KEY:
11292 ret = check_chunk_item(fs_info, eb, slot);
11295 case BTRFS_DEV_EXTENT_KEY:
11296 ret = check_dev_extent_item(fs_info, eb, slot);
11299 case BTRFS_EXTENT_ITEM_KEY:
11300 case BTRFS_METADATA_ITEM_KEY:
11301 ret = check_extent_item(fs_info, eb, slot);
11304 case BTRFS_EXTENT_CSUM_KEY:
11305 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11307 case BTRFS_TREE_BLOCK_REF_KEY:
11308 ret = check_tree_block_backref(fs_info, key.offset,
11312 case BTRFS_EXTENT_DATA_REF_KEY:
11313 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11314 ret = check_extent_data_backref(fs_info,
11315 btrfs_extent_data_ref_root(eb, dref),
11316 btrfs_extent_data_ref_objectid(eb, dref),
11317 btrfs_extent_data_ref_offset(eb, dref),
11319 btrfs_extent_data_ref_count(eb, dref));
11322 case BTRFS_SHARED_BLOCK_REF_KEY:
11323 ret = check_shared_block_backref(fs_info, key.offset,
11327 case BTRFS_SHARED_DATA_REF_KEY:
11328 ret = check_shared_data_backref(fs_info, key.offset,
11336 if (++slot < btrfs_header_nritems(eb))
11343 * Helper function for later fs/subvol tree check. To determine if a tree
11344 * block should be checked.
11345 * This function will ensure only the direct referencer with lowest rootid to
11346 * check a fs/subvolume tree block.
11348 * Backref check at extent tree would detect errors like missing subvolume
11349 * tree, so we can do aggressive check to reduce duplicated checks.
11351 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11353 struct btrfs_root *extent_root = root->fs_info->extent_root;
11354 struct btrfs_key key;
11355 struct btrfs_path path;
11356 struct extent_buffer *leaf;
11358 struct btrfs_extent_item *ei;
11364 struct btrfs_extent_inline_ref *iref;
11367 btrfs_init_path(&path);
11368 key.objectid = btrfs_header_bytenr(eb);
11369 key.type = BTRFS_METADATA_ITEM_KEY;
11370 key.offset = (u64)-1;
11373 * Any failure in backref resolving means we can't determine
11374 * whom the tree block belongs to.
11375 * So in that case, we need to check that tree block
11377 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11381 ret = btrfs_previous_extent_item(extent_root, &path,
11382 btrfs_header_bytenr(eb));
11386 leaf = path.nodes[0];
11387 slot = path.slots[0];
11388 btrfs_item_key_to_cpu(leaf, &key, slot);
11389 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11391 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11392 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11394 struct btrfs_tree_block_info *info;
11396 info = (struct btrfs_tree_block_info *)(ei + 1);
11397 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11400 item_size = btrfs_item_size_nr(leaf, slot);
11401 ptr = (unsigned long)iref;
11402 end = (unsigned long)ei + item_size;
11403 while (ptr < end) {
11404 iref = (struct btrfs_extent_inline_ref *)ptr;
11405 type = btrfs_extent_inline_ref_type(leaf, iref);
11406 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11409 * We only check the tree block if current root is
11410 * the lowest referencer of it.
11412 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11413 offset < root->objectid) {
11414 btrfs_release_path(&path);
11418 ptr += btrfs_extent_inline_ref_size(type);
11421 * Normally we should also check keyed tree block ref, but that may be
11422 * very time consuming. Inlined ref should already make us skip a lot
11423 * of refs now. So skip search keyed tree block ref.
11427 btrfs_release_path(&path);
11432 * Traversal function for tree block. We will do:
11433 * 1) Skip shared fs/subvolume tree blocks
11434 * 2) Update related bytes accounting
11435 * 3) Pre-order traversal
11437 static int traverse_tree_block(struct btrfs_root *root,
11438 struct extent_buffer *node)
11440 struct extent_buffer *eb;
11441 struct btrfs_key key;
11442 struct btrfs_key drop_key;
11450 * Skip shared fs/subvolume tree block, in that case they will
11451 * be checked by referencer with lowest rootid
11453 if (is_fstree(root->objectid) && !should_check(root, node))
11456 /* Update bytes accounting */
11457 total_btree_bytes += node->len;
11458 if (fs_root_objectid(btrfs_header_owner(node)))
11459 total_fs_tree_bytes += node->len;
11460 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11461 total_extent_tree_bytes += node->len;
11462 if (!found_old_backref &&
11463 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11464 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11465 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11466 found_old_backref = 1;
11468 /* pre-order tranversal, check itself first */
11469 level = btrfs_header_level(node);
11470 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11471 btrfs_header_level(node),
11472 btrfs_header_owner(node));
11476 "check %s failed root %llu bytenr %llu level %d, force continue check",
11477 level ? "node":"leaf", root->objectid,
11478 btrfs_header_bytenr(node), btrfs_header_level(node));
11481 btree_space_waste += btrfs_leaf_free_space(root, node);
11482 ret = check_leaf_items(root, node);
11487 nr = btrfs_header_nritems(node);
11488 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11489 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11490 sizeof(struct btrfs_key_ptr);
11492 /* Then check all its children */
11493 for (i = 0; i < nr; i++) {
11494 u64 blocknr = btrfs_node_blockptr(node, i);
11496 btrfs_node_key_to_cpu(node, &key, i);
11497 if (level == root->root_item.drop_level &&
11498 is_dropped_key(&key, &drop_key))
11502 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11503 * to call the function itself.
11505 eb = read_tree_block(root->fs_info, blocknr,
11506 root->fs_info->nodesize, 0);
11507 if (extent_buffer_uptodate(eb)) {
11508 ret = traverse_tree_block(root, eb);
11511 free_extent_buffer(eb);
11518 * Low memory usage version check_chunks_and_extents.
11520 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11522 struct btrfs_path path;
11523 struct btrfs_key key;
11524 struct btrfs_root *root1;
11525 struct btrfs_root *cur_root;
11529 root1 = root->fs_info->chunk_root;
11530 ret = traverse_tree_block(root1, root1->node);
11533 root1 = root->fs_info->tree_root;
11534 ret = traverse_tree_block(root1, root1->node);
11537 btrfs_init_path(&path);
11538 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11540 key.type = BTRFS_ROOT_ITEM_KEY;
11542 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11544 error("cannot find extent treet in tree_root");
11549 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11550 if (key.type != BTRFS_ROOT_ITEM_KEY)
11552 key.offset = (u64)-1;
11554 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11555 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11558 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11559 if (IS_ERR(cur_root) || !cur_root) {
11560 error("failed to read tree: %lld", key.objectid);
11564 ret = traverse_tree_block(cur_root, cur_root->node);
11567 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11568 btrfs_free_fs_root(cur_root);
11570 ret = btrfs_next_item(root1, &path);
11576 btrfs_release_path(&path);
11580 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11581 struct btrfs_root *root, int overwrite)
11583 struct extent_buffer *c;
11584 struct extent_buffer *old = root->node;
11587 struct btrfs_disk_key disk_key = {0,0,0};
11593 extent_buffer_get(c);
11596 c = btrfs_alloc_free_block(trans, root,
11597 root->fs_info->nodesize,
11598 root->root_key.objectid,
11599 &disk_key, level, 0, 0);
11602 extent_buffer_get(c);
11606 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11607 btrfs_set_header_level(c, level);
11608 btrfs_set_header_bytenr(c, c->start);
11609 btrfs_set_header_generation(c, trans->transid);
11610 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11611 btrfs_set_header_owner(c, root->root_key.objectid);
11613 write_extent_buffer(c, root->fs_info->fsid,
11614 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11616 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11617 btrfs_header_chunk_tree_uuid(c),
11620 btrfs_mark_buffer_dirty(c);
11622 * this case can happen in the following case:
11624 * 1.overwrite previous root.
11626 * 2.reinit reloc data root, this is because we skip pin
11627 * down reloc data tree before which means we can allocate
11628 * same block bytenr here.
11630 if (old->start == c->start) {
11631 btrfs_set_root_generation(&root->root_item,
11633 root->root_item.level = btrfs_header_level(root->node);
11634 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11635 &root->root_key, &root->root_item);
11637 free_extent_buffer(c);
11641 free_extent_buffer(old);
11643 add_root_to_dirty_list(root);
11647 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11648 struct extent_buffer *eb, int tree_root)
11650 struct extent_buffer *tmp;
11651 struct btrfs_root_item *ri;
11652 struct btrfs_key key;
11655 int level = btrfs_header_level(eb);
11661 * If we have pinned this block before, don't pin it again.
11662 * This can not only avoid forever loop with broken filesystem
11663 * but also give us some speedups.
11665 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11666 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11669 btrfs_pin_extent(fs_info, eb->start, eb->len);
11671 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11672 nritems = btrfs_header_nritems(eb);
11673 for (i = 0; i < nritems; i++) {
11675 btrfs_item_key_to_cpu(eb, &key, i);
11676 if (key.type != BTRFS_ROOT_ITEM_KEY)
11678 /* Skip the extent root and reloc roots */
11679 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11680 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11681 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11683 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11684 bytenr = btrfs_disk_root_bytenr(eb, ri);
11687 * If at any point we start needing the real root we
11688 * will have to build a stump root for the root we are
11689 * in, but for now this doesn't actually use the root so
11690 * just pass in extent_root.
11692 tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11693 if (!extent_buffer_uptodate(tmp)) {
11694 fprintf(stderr, "Error reading root block\n");
11697 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11698 free_extent_buffer(tmp);
11702 bytenr = btrfs_node_blockptr(eb, i);
11704 /* If we aren't the tree root don't read the block */
11705 if (level == 1 && !tree_root) {
11706 btrfs_pin_extent(fs_info, bytenr, nodesize);
11710 tmp = read_tree_block(fs_info, bytenr,
11712 if (!extent_buffer_uptodate(tmp)) {
11713 fprintf(stderr, "Error reading tree block\n");
11716 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11717 free_extent_buffer(tmp);
11726 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11730 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11734 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11737 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11739 struct btrfs_block_group_cache *cache;
11740 struct btrfs_path path;
11741 struct extent_buffer *leaf;
11742 struct btrfs_chunk *chunk;
11743 struct btrfs_key key;
11747 btrfs_init_path(&path);
11749 key.type = BTRFS_CHUNK_ITEM_KEY;
11751 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11753 btrfs_release_path(&path);
11758 * We do this in case the block groups were screwed up and had alloc
11759 * bits that aren't actually set on the chunks. This happens with
11760 * restored images every time and could happen in real life I guess.
11762 fs_info->avail_data_alloc_bits = 0;
11763 fs_info->avail_metadata_alloc_bits = 0;
11764 fs_info->avail_system_alloc_bits = 0;
11766 /* First we need to create the in-memory block groups */
11768 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11769 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11771 btrfs_release_path(&path);
11779 leaf = path.nodes[0];
11780 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11781 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11786 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11787 btrfs_add_block_group(fs_info, 0,
11788 btrfs_chunk_type(leaf, chunk),
11789 key.objectid, key.offset,
11790 btrfs_chunk_length(leaf, chunk));
11791 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11792 key.offset + btrfs_chunk_length(leaf, chunk));
11797 cache = btrfs_lookup_first_block_group(fs_info, start);
11801 start = cache->key.objectid + cache->key.offset;
11804 btrfs_release_path(&path);
11808 static int reset_balance(struct btrfs_trans_handle *trans,
11809 struct btrfs_fs_info *fs_info)
11811 struct btrfs_root *root = fs_info->tree_root;
11812 struct btrfs_path path;
11813 struct extent_buffer *leaf;
11814 struct btrfs_key key;
11815 int del_slot, del_nr = 0;
11819 btrfs_init_path(&path);
11820 key.objectid = BTRFS_BALANCE_OBJECTID;
11821 key.type = BTRFS_BALANCE_ITEM_KEY;
11823 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11828 goto reinit_data_reloc;
11833 ret = btrfs_del_item(trans, root, &path);
11836 btrfs_release_path(&path);
11838 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11839 key.type = BTRFS_ROOT_ITEM_KEY;
11841 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11845 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11850 ret = btrfs_del_items(trans, root, &path,
11857 btrfs_release_path(&path);
11860 ret = btrfs_search_slot(trans, root, &key, &path,
11867 leaf = path.nodes[0];
11868 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11869 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11871 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11876 del_slot = path.slots[0];
11885 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11889 btrfs_release_path(&path);
11892 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11893 key.type = BTRFS_ROOT_ITEM_KEY;
11894 key.offset = (u64)-1;
11895 root = btrfs_read_fs_root(fs_info, &key);
11896 if (IS_ERR(root)) {
11897 fprintf(stderr, "Error reading data reloc tree\n");
11898 ret = PTR_ERR(root);
11901 record_root_in_trans(trans, root);
11902 ret = btrfs_fsck_reinit_root(trans, root, 0);
11905 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11907 btrfs_release_path(&path);
11911 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11912 struct btrfs_fs_info *fs_info)
11918 * The only reason we don't do this is because right now we're just
11919 * walking the trees we find and pinning down their bytes, we don't look
11920 * at any of the leaves. In order to do mixed groups we'd have to check
11921 * the leaves of any fs roots and pin down the bytes for any file
11922 * extents we find. Not hard but why do it if we don't have to?
11924 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11925 fprintf(stderr, "We don't support re-initing the extent tree "
11926 "for mixed block groups yet, please notify a btrfs "
11927 "developer you want to do this so they can add this "
11928 "functionality.\n");
11933 * first we need to walk all of the trees except the extent tree and pin
11934 * down the bytes that are in use so we don't overwrite any existing
11937 ret = pin_metadata_blocks(fs_info);
11939 fprintf(stderr, "error pinning down used bytes\n");
11944 * Need to drop all the block groups since we're going to recreate all
11947 btrfs_free_block_groups(fs_info);
11948 ret = reset_block_groups(fs_info);
11950 fprintf(stderr, "error resetting the block groups\n");
11954 /* Ok we can allocate now, reinit the extent root */
11955 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11957 fprintf(stderr, "extent root initialization failed\n");
11959 * When the transaction code is updated we should end the
11960 * transaction, but for now progs only knows about commit so
11961 * just return an error.
11967 * Now we have all the in-memory block groups setup so we can make
11968 * allocations properly, and the metadata we care about is safe since we
11969 * pinned all of it above.
11972 struct btrfs_block_group_cache *cache;
11974 cache = btrfs_lookup_first_block_group(fs_info, start);
11977 start = cache->key.objectid + cache->key.offset;
11978 ret = btrfs_insert_item(trans, fs_info->extent_root,
11979 &cache->key, &cache->item,
11980 sizeof(cache->item));
11982 fprintf(stderr, "Error adding block group\n");
11985 btrfs_extent_post_op(trans, fs_info->extent_root);
11988 ret = reset_balance(trans, fs_info);
11990 fprintf(stderr, "error resetting the pending balance\n");
11995 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11997 struct btrfs_path path;
11998 struct btrfs_trans_handle *trans;
11999 struct btrfs_key key;
12002 printf("Recowing metadata block %llu\n", eb->start);
12003 key.objectid = btrfs_header_owner(eb);
12004 key.type = BTRFS_ROOT_ITEM_KEY;
12005 key.offset = (u64)-1;
12007 root = btrfs_read_fs_root(root->fs_info, &key);
12008 if (IS_ERR(root)) {
12009 fprintf(stderr, "Couldn't find owner root %llu\n",
12011 return PTR_ERR(root);
12014 trans = btrfs_start_transaction(root, 1);
12016 return PTR_ERR(trans);
12018 btrfs_init_path(&path);
12019 path.lowest_level = btrfs_header_level(eb);
12020 if (path.lowest_level)
12021 btrfs_node_key_to_cpu(eb, &key, 0);
12023 btrfs_item_key_to_cpu(eb, &key, 0);
12025 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12026 btrfs_commit_transaction(trans, root);
12027 btrfs_release_path(&path);
12031 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12033 struct btrfs_path path;
12034 struct btrfs_trans_handle *trans;
12035 struct btrfs_key key;
12038 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12039 bad->key.type, bad->key.offset);
12040 key.objectid = bad->root_id;
12041 key.type = BTRFS_ROOT_ITEM_KEY;
12042 key.offset = (u64)-1;
12044 root = btrfs_read_fs_root(root->fs_info, &key);
12045 if (IS_ERR(root)) {
12046 fprintf(stderr, "Couldn't find owner root %llu\n",
12048 return PTR_ERR(root);
12051 trans = btrfs_start_transaction(root, 1);
12053 return PTR_ERR(trans);
12055 btrfs_init_path(&path);
12056 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12062 ret = btrfs_del_item(trans, root, &path);
12064 btrfs_commit_transaction(trans, root);
12065 btrfs_release_path(&path);
12069 static int zero_log_tree(struct btrfs_root *root)
12071 struct btrfs_trans_handle *trans;
12074 trans = btrfs_start_transaction(root, 1);
12075 if (IS_ERR(trans)) {
12076 ret = PTR_ERR(trans);
12079 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12080 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12081 ret = btrfs_commit_transaction(trans, root);
12085 static int populate_csum(struct btrfs_trans_handle *trans,
12086 struct btrfs_root *csum_root, char *buf, u64 start,
12093 while (offset < len) {
12094 sectorsize = csum_root->fs_info->sectorsize;
12095 ret = read_extent_data(csum_root, buf, start + offset,
12099 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12100 start + offset, buf, sectorsize);
12103 offset += sectorsize;
12108 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12109 struct btrfs_root *csum_root,
12110 struct btrfs_root *cur_root)
12112 struct btrfs_path path;
12113 struct btrfs_key key;
12114 struct extent_buffer *node;
12115 struct btrfs_file_extent_item *fi;
12122 buf = malloc(cur_root->fs_info->sectorsize);
12126 btrfs_init_path(&path);
12130 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12133 /* Iterate all regular file extents and fill its csum */
12135 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12137 if (key.type != BTRFS_EXTENT_DATA_KEY)
12139 node = path.nodes[0];
12140 slot = path.slots[0];
12141 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12142 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12144 start = btrfs_file_extent_disk_bytenr(node, fi);
12145 len = btrfs_file_extent_disk_num_bytes(node, fi);
12147 ret = populate_csum(trans, csum_root, buf, start, len);
12148 if (ret == -EEXIST)
12154 * TODO: if next leaf is corrupted, jump to nearest next valid
12157 ret = btrfs_next_item(cur_root, &path);
12167 btrfs_release_path(&path);
12172 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12173 struct btrfs_root *csum_root)
12175 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12176 struct btrfs_path path;
12177 struct btrfs_root *tree_root = fs_info->tree_root;
12178 struct btrfs_root *cur_root;
12179 struct extent_buffer *node;
12180 struct btrfs_key key;
12184 btrfs_init_path(&path);
12185 key.objectid = BTRFS_FS_TREE_OBJECTID;
12187 key.type = BTRFS_ROOT_ITEM_KEY;
12188 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12197 node = path.nodes[0];
12198 slot = path.slots[0];
12199 btrfs_item_key_to_cpu(node, &key, slot);
12200 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12202 if (key.type != BTRFS_ROOT_ITEM_KEY)
12204 if (!is_fstree(key.objectid))
12206 key.offset = (u64)-1;
12208 cur_root = btrfs_read_fs_root(fs_info, &key);
12209 if (IS_ERR(cur_root) || !cur_root) {
12210 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12214 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12219 ret = btrfs_next_item(tree_root, &path);
12229 btrfs_release_path(&path);
12233 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12234 struct btrfs_root *csum_root)
12236 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12237 struct btrfs_path path;
12238 struct btrfs_extent_item *ei;
12239 struct extent_buffer *leaf;
12241 struct btrfs_key key;
12244 btrfs_init_path(&path);
12246 key.type = BTRFS_EXTENT_ITEM_KEY;
12248 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12250 btrfs_release_path(&path);
12254 buf = malloc(csum_root->fs_info->sectorsize);
12256 btrfs_release_path(&path);
12261 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12262 ret = btrfs_next_leaf(extent_root, &path);
12270 leaf = path.nodes[0];
12272 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12273 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12278 ei = btrfs_item_ptr(leaf, path.slots[0],
12279 struct btrfs_extent_item);
12280 if (!(btrfs_extent_flags(leaf, ei) &
12281 BTRFS_EXTENT_FLAG_DATA)) {
12286 ret = populate_csum(trans, csum_root, buf, key.objectid,
12293 btrfs_release_path(&path);
12299 * Recalculate the csum and put it into the csum tree.
12301 * Extent tree init will wipe out all the extent info, so in that case, we
12302 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12303 * will use fs/subvol trees to init the csum tree.
12305 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12306 struct btrfs_root *csum_root,
12307 int search_fs_tree)
12309 if (search_fs_tree)
12310 return fill_csum_tree_from_fs(trans, csum_root);
12312 return fill_csum_tree_from_extent(trans, csum_root);
12315 static void free_roots_info_cache(void)
12317 if (!roots_info_cache)
12320 while (!cache_tree_empty(roots_info_cache)) {
12321 struct cache_extent *entry;
12322 struct root_item_info *rii;
12324 entry = first_cache_extent(roots_info_cache);
12327 remove_cache_extent(roots_info_cache, entry);
12328 rii = container_of(entry, struct root_item_info, cache_extent);
12332 free(roots_info_cache);
12333 roots_info_cache = NULL;
12336 static int build_roots_info_cache(struct btrfs_fs_info *info)
12339 struct btrfs_key key;
12340 struct extent_buffer *leaf;
12341 struct btrfs_path path;
12343 if (!roots_info_cache) {
12344 roots_info_cache = malloc(sizeof(*roots_info_cache));
12345 if (!roots_info_cache)
12347 cache_tree_init(roots_info_cache);
12350 btrfs_init_path(&path);
12352 key.type = BTRFS_EXTENT_ITEM_KEY;
12354 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12357 leaf = path.nodes[0];
12360 struct btrfs_key found_key;
12361 struct btrfs_extent_item *ei;
12362 struct btrfs_extent_inline_ref *iref;
12363 int slot = path.slots[0];
12368 struct cache_extent *entry;
12369 struct root_item_info *rii;
12371 if (slot >= btrfs_header_nritems(leaf)) {
12372 ret = btrfs_next_leaf(info->extent_root, &path);
12379 leaf = path.nodes[0];
12380 slot = path.slots[0];
12383 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12385 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12386 found_key.type != BTRFS_METADATA_ITEM_KEY)
12389 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12390 flags = btrfs_extent_flags(leaf, ei);
12392 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12393 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12396 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12397 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12398 level = found_key.offset;
12400 struct btrfs_tree_block_info *binfo;
12402 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12403 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12404 level = btrfs_tree_block_level(leaf, binfo);
12408 * For a root extent, it must be of the following type and the
12409 * first (and only one) iref in the item.
12411 type = btrfs_extent_inline_ref_type(leaf, iref);
12412 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12415 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12416 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12418 rii = malloc(sizeof(struct root_item_info));
12423 rii->cache_extent.start = root_id;
12424 rii->cache_extent.size = 1;
12425 rii->level = (u8)-1;
12426 entry = &rii->cache_extent;
12427 ret = insert_cache_extent(roots_info_cache, entry);
12430 rii = container_of(entry, struct root_item_info,
12434 ASSERT(rii->cache_extent.start == root_id);
12435 ASSERT(rii->cache_extent.size == 1);
12437 if (level > rii->level || rii->level == (u8)-1) {
12438 rii->level = level;
12439 rii->bytenr = found_key.objectid;
12440 rii->gen = btrfs_extent_generation(leaf, ei);
12441 rii->node_count = 1;
12442 } else if (level == rii->level) {
12450 btrfs_release_path(&path);
12455 static int maybe_repair_root_item(struct btrfs_path *path,
12456 const struct btrfs_key *root_key,
12457 const int read_only_mode)
12459 const u64 root_id = root_key->objectid;
12460 struct cache_extent *entry;
12461 struct root_item_info *rii;
12462 struct btrfs_root_item ri;
12463 unsigned long offset;
12465 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12468 "Error: could not find extent items for root %llu\n",
12469 root_key->objectid);
12473 rii = container_of(entry, struct root_item_info, cache_extent);
12474 ASSERT(rii->cache_extent.start == root_id);
12475 ASSERT(rii->cache_extent.size == 1);
12477 if (rii->node_count != 1) {
12479 "Error: could not find btree root extent for root %llu\n",
12484 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12485 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12487 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12488 btrfs_root_level(&ri) != rii->level ||
12489 btrfs_root_generation(&ri) != rii->gen) {
12492 * If we're in repair mode but our caller told us to not update
12493 * the root item, i.e. just check if it needs to be updated, don't
12494 * print this message, since the caller will call us again shortly
12495 * for the same root item without read only mode (the caller will
12496 * open a transaction first).
12498 if (!(read_only_mode && repair))
12500 "%sroot item for root %llu,"
12501 " current bytenr %llu, current gen %llu, current level %u,"
12502 " new bytenr %llu, new gen %llu, new level %u\n",
12503 (read_only_mode ? "" : "fixing "),
12505 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12506 btrfs_root_level(&ri),
12507 rii->bytenr, rii->gen, rii->level);
12509 if (btrfs_root_generation(&ri) > rii->gen) {
12511 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12512 root_id, btrfs_root_generation(&ri), rii->gen);
12516 if (!read_only_mode) {
12517 btrfs_set_root_bytenr(&ri, rii->bytenr);
12518 btrfs_set_root_level(&ri, rii->level);
12519 btrfs_set_root_generation(&ri, rii->gen);
12520 write_extent_buffer(path->nodes[0], &ri,
12521 offset, sizeof(ri));
12531 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12532 * caused read-only snapshots to be corrupted if they were created at a moment
12533 * when the source subvolume/snapshot had orphan items. The issue was that the
12534 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12535 * node instead of the post orphan cleanup root node.
12536 * So this function, and its callees, just detects and fixes those cases. Even
12537 * though the regression was for read-only snapshots, this function applies to
12538 * any snapshot/subvolume root.
12539 * This must be run before any other repair code - not doing it so, makes other
12540 * repair code delete or modify backrefs in the extent tree for example, which
12541 * will result in an inconsistent fs after repairing the root items.
12543 static int repair_root_items(struct btrfs_fs_info *info)
12545 struct btrfs_path path;
12546 struct btrfs_key key;
12547 struct extent_buffer *leaf;
12548 struct btrfs_trans_handle *trans = NULL;
12551 int need_trans = 0;
12553 btrfs_init_path(&path);
12555 ret = build_roots_info_cache(info);
12559 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12560 key.type = BTRFS_ROOT_ITEM_KEY;
12565 * Avoid opening and committing transactions if a leaf doesn't have
12566 * any root items that need to be fixed, so that we avoid rotating
12567 * backup roots unnecessarily.
12570 trans = btrfs_start_transaction(info->tree_root, 1);
12571 if (IS_ERR(trans)) {
12572 ret = PTR_ERR(trans);
12577 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12581 leaf = path.nodes[0];
12584 struct btrfs_key found_key;
12586 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12587 int no_more_keys = find_next_key(&path, &key);
12589 btrfs_release_path(&path);
12591 ret = btrfs_commit_transaction(trans,
12603 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12605 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12607 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12610 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12614 if (!trans && repair) {
12617 btrfs_release_path(&path);
12627 free_roots_info_cache();
12628 btrfs_release_path(&path);
12630 btrfs_commit_transaction(trans, info->tree_root);
12637 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12639 struct btrfs_trans_handle *trans;
12640 struct btrfs_block_group_cache *bg_cache;
12644 /* Clear all free space cache inodes and its extent data */
12646 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12649 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12652 current = bg_cache->key.objectid + bg_cache->key.offset;
12655 /* Don't forget to set cache_generation to -1 */
12656 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12657 if (IS_ERR(trans)) {
12658 error("failed to update super block cache generation");
12659 return PTR_ERR(trans);
12661 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12662 btrfs_commit_transaction(trans, fs_info->tree_root);
12667 const char * const cmd_check_usage[] = {
12668 "btrfs check [options] <device>",
12669 "Check structural integrity of a filesystem (unmounted).",
12670 "Check structural integrity of an unmounted filesystem. Verify internal",
12671 "trees' consistency and item connectivity. In the repair mode try to",
12672 "fix the problems found. ",
12673 "WARNING: the repair mode is considered dangerous",
12675 "-s|--super <superblock> use this superblock copy",
12676 "-b|--backup use the first valid backup root copy",
12677 "--repair try to repair the filesystem",
12678 "--readonly run in read-only mode (default)",
12679 "--init-csum-tree create a new CRC tree",
12680 "--init-extent-tree create a new extent tree",
12681 "--mode <MODE> allows choice of memory/IO trade-offs",
12682 " where MODE is one of:",
12683 " original - read inodes and extents to memory (requires",
12684 " more memory, does less IO)",
12685 " lowmem - try to use less memory but read blocks again",
12687 "--check-data-csum verify checksums of data blocks",
12688 "-Q|--qgroup-report print a report on qgroup consistency",
12689 "-E|--subvol-extents <subvolid>",
12690 " print subvolume extents and sharing state",
12691 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12692 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12693 "-p|--progress indicate progress",
12694 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12698 int cmd_check(int argc, char **argv)
12700 struct cache_tree root_cache;
12701 struct btrfs_root *root;
12702 struct btrfs_fs_info *info;
12705 u64 tree_root_bytenr = 0;
12706 u64 chunk_root_bytenr = 0;
12707 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12711 int init_csum_tree = 0;
12713 int clear_space_cache = 0;
12714 int qgroup_report = 0;
12715 int qgroups_repaired = 0;
12716 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12720 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12721 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12722 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12723 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12724 static const struct option long_options[] = {
12725 { "super", required_argument, NULL, 's' },
12726 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12727 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12728 { "init-csum-tree", no_argument, NULL,
12729 GETOPT_VAL_INIT_CSUM },
12730 { "init-extent-tree", no_argument, NULL,
12731 GETOPT_VAL_INIT_EXTENT },
12732 { "check-data-csum", no_argument, NULL,
12733 GETOPT_VAL_CHECK_CSUM },
12734 { "backup", no_argument, NULL, 'b' },
12735 { "subvol-extents", required_argument, NULL, 'E' },
12736 { "qgroup-report", no_argument, NULL, 'Q' },
12737 { "tree-root", required_argument, NULL, 'r' },
12738 { "chunk-root", required_argument, NULL,
12739 GETOPT_VAL_CHUNK_TREE },
12740 { "progress", no_argument, NULL, 'p' },
12741 { "mode", required_argument, NULL,
12743 { "clear-space-cache", required_argument, NULL,
12744 GETOPT_VAL_CLEAR_SPACE_CACHE},
12745 { NULL, 0, NULL, 0}
12748 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12752 case 'a': /* ignored */ break;
12754 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12757 num = arg_strtou64(optarg);
12758 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12760 "super mirror should be less than %d",
12761 BTRFS_SUPER_MIRROR_MAX);
12764 bytenr = btrfs_sb_offset(((int)num));
12765 printf("using SB copy %llu, bytenr %llu\n", num,
12766 (unsigned long long)bytenr);
12772 subvolid = arg_strtou64(optarg);
12775 tree_root_bytenr = arg_strtou64(optarg);
12777 case GETOPT_VAL_CHUNK_TREE:
12778 chunk_root_bytenr = arg_strtou64(optarg);
12781 ctx.progress_enabled = true;
12785 usage(cmd_check_usage);
12786 case GETOPT_VAL_REPAIR:
12787 printf("enabling repair mode\n");
12789 ctree_flags |= OPEN_CTREE_WRITES;
12791 case GETOPT_VAL_READONLY:
12794 case GETOPT_VAL_INIT_CSUM:
12795 printf("Creating a new CRC tree\n");
12796 init_csum_tree = 1;
12798 ctree_flags |= OPEN_CTREE_WRITES;
12800 case GETOPT_VAL_INIT_EXTENT:
12801 init_extent_tree = 1;
12802 ctree_flags |= (OPEN_CTREE_WRITES |
12803 OPEN_CTREE_NO_BLOCK_GROUPS);
12806 case GETOPT_VAL_CHECK_CSUM:
12807 check_data_csum = 1;
12809 case GETOPT_VAL_MODE:
12810 check_mode = parse_check_mode(optarg);
12811 if (check_mode == CHECK_MODE_UNKNOWN) {
12812 error("unknown mode: %s", optarg);
12816 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12817 if (strcmp(optarg, "v1") == 0) {
12818 clear_space_cache = 1;
12819 } else if (strcmp(optarg, "v2") == 0) {
12820 clear_space_cache = 2;
12821 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12824 "invalid argument to --clear-space-cache, must be v1 or v2");
12827 ctree_flags |= OPEN_CTREE_WRITES;
12832 if (check_argc_exact(argc - optind, 1))
12833 usage(cmd_check_usage);
12835 if (ctx.progress_enabled) {
12836 ctx.tp = TASK_NOTHING;
12837 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12840 /* This check is the only reason for --readonly to exist */
12841 if (readonly && repair) {
12842 error("repair options are not compatible with --readonly");
12847 * Not supported yet
12849 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12850 error("low memory mode doesn't support repair yet");
12855 cache_tree_init(&root_cache);
12857 if((ret = check_mounted(argv[optind])) < 0) {
12858 error("could not check mount status: %s", strerror(-ret));
12862 error("%s is currently mounted, aborting", argv[optind]);
12868 /* only allow partial opening under repair mode */
12870 ctree_flags |= OPEN_CTREE_PARTIAL;
12872 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12873 chunk_root_bytenr, ctree_flags);
12875 error("cannot open file system");
12881 global_info = info;
12882 root = info->fs_root;
12883 if (clear_space_cache == 1) {
12884 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12886 "free space cache v2 detected, use --clear-space-cache v2");
12890 printf("Clearing free space cache\n");
12891 ret = clear_free_space_cache(info);
12893 error("failed to clear free space cache");
12896 printf("Free space cache cleared\n");
12899 } else if (clear_space_cache == 2) {
12900 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12901 printf("no free space cache v2 to clear\n");
12905 printf("Clear free space cache v2\n");
12906 ret = btrfs_clear_free_space_tree(info);
12908 error("failed to clear free space cache v2: %d", ret);
12911 printf("free space cache v2 cleared\n");
12917 * repair mode will force us to commit transaction which
12918 * will make us fail to load log tree when mounting.
12920 if (repair && btrfs_super_log_root(info->super_copy)) {
12921 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12927 ret = zero_log_tree(root);
12930 error("failed to zero log tree: %d", ret);
12935 uuid_unparse(info->super_copy->fsid, uuidbuf);
12936 if (qgroup_report) {
12937 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12939 ret = qgroup_verify_all(info);
12946 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12947 subvolid, argv[optind], uuidbuf);
12948 ret = print_extent_state(info, subvolid);
12952 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12954 if (!extent_buffer_uptodate(info->tree_root->node) ||
12955 !extent_buffer_uptodate(info->dev_root->node) ||
12956 !extent_buffer_uptodate(info->chunk_root->node)) {
12957 error("critical roots corrupted, unable to check the filesystem");
12963 if (init_extent_tree || init_csum_tree) {
12964 struct btrfs_trans_handle *trans;
12966 trans = btrfs_start_transaction(info->extent_root, 0);
12967 if (IS_ERR(trans)) {
12968 error("error starting transaction");
12969 ret = PTR_ERR(trans);
12974 if (init_extent_tree) {
12975 printf("Creating a new extent tree\n");
12976 ret = reinit_extent_tree(trans, info);
12982 if (init_csum_tree) {
12983 printf("Reinitialize checksum tree\n");
12984 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12986 error("checksum tree initialization failed: %d",
12993 ret = fill_csum_tree(trans, info->csum_root,
12997 error("checksum tree refilling failed: %d", ret);
13002 * Ok now we commit and run the normal fsck, which will add
13003 * extent entries for all of the items it finds.
13005 ret = btrfs_commit_transaction(trans, info->extent_root);
13010 if (!extent_buffer_uptodate(info->extent_root->node)) {
13011 error("critical: extent_root, unable to check the filesystem");
13016 if (!extent_buffer_uptodate(info->csum_root->node)) {
13017 error("critical: csum_root, unable to check the filesystem");
13023 if (!ctx.progress_enabled)
13024 fprintf(stderr, "checking extents\n");
13025 if (check_mode == CHECK_MODE_LOWMEM)
13026 ret = check_chunks_and_extents_v2(root);
13028 ret = check_chunks_and_extents(root);
13032 "errors found in extent allocation tree or chunk allocation");
13034 ret = repair_root_items(info);
13037 error("failed to repair root items: %s", strerror(-ret));
13041 fprintf(stderr, "Fixed %d roots.\n", ret);
13043 } else if (ret > 0) {
13045 "Found %d roots with an outdated root item.\n",
13048 "Please run a filesystem check with the option --repair to fix them.\n");
13054 if (!ctx.progress_enabled) {
13055 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13056 fprintf(stderr, "checking free space tree\n");
13058 fprintf(stderr, "checking free space cache\n");
13060 ret = check_space_cache(root);
13063 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064 error("errors found in free space tree");
13066 error("errors found in free space cache");
13071 * We used to have to have these hole extents in between our real
13072 * extents so if we don't have this flag set we need to make sure there
13073 * are no gaps in the file extents for inodes, otherwise we can just
13074 * ignore it when this happens.
13076 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13077 if (!ctx.progress_enabled)
13078 fprintf(stderr, "checking fs roots\n");
13079 if (check_mode == CHECK_MODE_LOWMEM)
13080 ret = check_fs_roots_v2(root->fs_info);
13082 ret = check_fs_roots(root, &root_cache);
13085 error("errors found in fs roots");
13089 fprintf(stderr, "checking csums\n");
13090 ret = check_csums(root);
13093 error("errors found in csum tree");
13097 fprintf(stderr, "checking root refs\n");
13098 /* For low memory mode, check_fs_roots_v2 handles root refs */
13099 if (check_mode != CHECK_MODE_LOWMEM) {
13100 ret = check_root_refs(root, &root_cache);
13103 error("errors found in root refs");
13108 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13109 struct extent_buffer *eb;
13111 eb = list_first_entry(&root->fs_info->recow_ebs,
13112 struct extent_buffer, recow);
13113 list_del_init(&eb->recow);
13114 ret = recow_extent_buffer(root, eb);
13117 error("fails to fix transid errors");
13122 while (!list_empty(&delete_items)) {
13123 struct bad_item *bad;
13125 bad = list_first_entry(&delete_items, struct bad_item, list);
13126 list_del_init(&bad->list);
13128 ret = delete_bad_item(root, bad);
13134 if (info->quota_enabled) {
13135 fprintf(stderr, "checking quota groups\n");
13136 ret = qgroup_verify_all(info);
13139 error("failed to check quota groups");
13143 ret = repair_qgroups(info, &qgroups_repaired);
13146 error("failed to repair quota groups");
13152 if (!list_empty(&root->fs_info->recow_ebs)) {
13153 error("transid errors in file system");
13158 if (found_old_backref) { /*
13159 * there was a disk format change when mixed
13160 * backref was in testing tree. The old format
13161 * existed about one week.
13163 printf("\n * Found old mixed backref format. "
13164 "The old format is not supported! *"
13165 "\n * Please mount the FS in readonly mode, "
13166 "backup data and re-format the FS. *\n\n");
13169 printf("found %llu bytes used, ",
13170 (unsigned long long)bytes_used);
13172 printf("error(s) found\n");
13174 printf("no error found\n");
13175 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13176 printf("total tree bytes: %llu\n",
13177 (unsigned long long)total_btree_bytes);
13178 printf("total fs tree bytes: %llu\n",
13179 (unsigned long long)total_fs_tree_bytes);
13180 printf("total extent tree bytes: %llu\n",
13181 (unsigned long long)total_extent_tree_bytes);
13182 printf("btree space waste bytes: %llu\n",
13183 (unsigned long long)btree_space_waste);
13184 printf("file data blocks allocated: %llu\n referenced %llu\n",
13185 (unsigned long long)data_bytes_allocated,
13186 (unsigned long long)data_bytes_referenced);
13188 free_qgroup_counts();
13189 free_root_recs_tree(&root_cache);
13193 if (ctx.progress_enabled)
13194 task_deinit(ctx.info);