2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
878 node = container_of(cache, struct ptr_node, cache);
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
888 rec = calloc(1, sizeof(*rec));
890 return ERR_PTR(-ENOMEM);
892 rec->extent_start = (u64)-1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
910 ret = insert_cache_extent(inode_cache, &node->cache);
912 return ERR_PTR(-EEXIST);
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
962 if (!rec->found_inode_item)
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
978 if (!rec->checked || rec->merging)
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1069 if (backref->dir != dir || backref->namelen != namelen)
1071 if (memcmp(name, backref->name, namelen))
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1079 memset(backref, 0, sizeof(*backref));
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1137 maybe_free_inode_rec(inode_cache, rec);
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1234 if (--src_node->refs == 0)
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1242 cache = search_cache_extent(src, 0);
1244 node = container_of(cache, struct ptr_node, cache);
1246 cache = next_cache_extent(cache);
1249 remove_cache_extent(src, &node->cache);
1252 ins = malloc(sizeof(*ins));
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1302 free_inode_rec(rec);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1316 node = container_of(cache, struct shared_node, cache);
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1336 ret = insert_cache_extent(shared, &node->cache);
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1348 if (level == wc->active_node)
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1389 if (level == wc->root_level)
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1408 BUG_ON(node->refs < 2);
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1440 btrfs_release_path(&path);
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1476 btrfs_release_path(&path);
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1522 len = min_t(u32, total - cur - sizeof(*di),
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532 add_inode_backref(inode_cache, location.objectid,
1533 key->objectid, key->offset, namebuf,
1534 len, filetype, key->type, error);
1535 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536 add_inode_backref(root_cache, location.objectid,
1537 key->objectid, key->offset,
1538 namebuf, len, filetype,
1541 fprintf(stderr, "invalid location in dir item %u\n",
1543 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544 key->objectid, key->offset, namebuf,
1545 len, filetype, key->type, error);
1548 len = sizeof(*di) + name_len + data_len;
1549 di = (struct btrfs_dir_item *)((char *)di + len);
1552 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553 rec->errors |= I_ERR_DUP_DIR_INDEX;
1558 static int process_inode_ref(struct extent_buffer *eb,
1559 int slot, struct btrfs_key *key,
1560 struct shared_node *active_node)
1568 struct cache_tree *inode_cache;
1569 struct btrfs_inode_ref *ref;
1570 char namebuf[BTRFS_NAME_LEN];
1572 inode_cache = &active_node->inode_cache;
1574 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575 total = btrfs_item_size_nr(eb, slot);
1576 while (cur < total) {
1577 name_len = btrfs_inode_ref_name_len(eb, ref);
1578 index = btrfs_inode_ref_index(eb, ref);
1580 /* inode_ref + namelen should not cross item boundary */
1581 if (cur + sizeof(*ref) + name_len > total ||
1582 name_len > BTRFS_NAME_LEN) {
1583 if (total < cur + sizeof(*ref))
1586 /* Still try to read out the remaining part */
1587 len = min_t(u32, total - cur - sizeof(*ref),
1589 error = REF_ERR_NAME_TOO_LONG;
1595 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596 add_inode_backref(inode_cache, key->objectid, key->offset,
1597 index, namebuf, len, 0, key->type, error);
1599 len = sizeof(*ref) + name_len;
1600 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1606 static int process_inode_extref(struct extent_buffer *eb,
1607 int slot, struct btrfs_key *key,
1608 struct shared_node *active_node)
1617 struct cache_tree *inode_cache;
1618 struct btrfs_inode_extref *extref;
1619 char namebuf[BTRFS_NAME_LEN];
1621 inode_cache = &active_node->inode_cache;
1623 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624 total = btrfs_item_size_nr(eb, slot);
1625 while (cur < total) {
1626 name_len = btrfs_inode_extref_name_len(eb, extref);
1627 index = btrfs_inode_extref_index(eb, extref);
1628 parent = btrfs_inode_extref_parent(eb, extref);
1629 if (name_len <= BTRFS_NAME_LEN) {
1633 len = BTRFS_NAME_LEN;
1634 error = REF_ERR_NAME_TOO_LONG;
1636 read_extent_buffer(eb, namebuf,
1637 (unsigned long)(extref + 1), len);
1638 add_inode_backref(inode_cache, key->objectid, parent,
1639 index, namebuf, len, 0, key->type, error);
1641 len = sizeof(*extref) + name_len;
1642 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650 u64 len, u64 *found)
1652 struct btrfs_key key;
1653 struct btrfs_path path;
1654 struct extent_buffer *leaf;
1659 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1661 btrfs_init_path(&path);
1663 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1665 key.type = BTRFS_EXTENT_CSUM_KEY;
1667 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1671 if (ret > 0 && path.slots[0] > 0) {
1672 leaf = path.nodes[0];
1673 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675 key.type == BTRFS_EXTENT_CSUM_KEY)
1680 leaf = path.nodes[0];
1681 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1687 leaf = path.nodes[0];
1690 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692 key.type != BTRFS_EXTENT_CSUM_KEY)
1695 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696 if (key.offset >= start + len)
1699 if (key.offset > start)
1702 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703 csum_end = key.offset + (size / csum_size) *
1704 root->fs_info->sectorsize;
1705 if (csum_end > start) {
1706 size = min(csum_end - start, len);
1715 btrfs_release_path(&path);
1721 static int process_file_extent(struct btrfs_root *root,
1722 struct extent_buffer *eb,
1723 int slot, struct btrfs_key *key,
1724 struct shared_node *active_node)
1726 struct inode_record *rec;
1727 struct btrfs_file_extent_item *fi;
1729 u64 disk_bytenr = 0;
1730 u64 extent_offset = 0;
1731 u64 mask = root->fs_info->sectorsize - 1;
1735 rec = active_node->current;
1736 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737 rec->found_file_extent = 1;
1739 if (rec->extent_start == (u64)-1) {
1740 rec->extent_start = key->offset;
1741 rec->extent_end = key->offset;
1744 if (rec->extent_end > key->offset)
1745 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746 else if (rec->extent_end < key->offset) {
1747 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748 key->offset - rec->extent_end);
1753 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754 extent_type = btrfs_file_extent_type(eb, fi);
1756 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 rec->found_size += num_bytes;
1761 num_bytes = (num_bytes + mask) & ~mask;
1762 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766 extent_offset = btrfs_file_extent_offset(eb, fi);
1767 if (num_bytes == 0 || (num_bytes & mask))
1768 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769 if (num_bytes + extent_offset >
1770 btrfs_file_extent_ram_bytes(eb, fi))
1771 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773 (btrfs_file_extent_compression(eb, fi) ||
1774 btrfs_file_extent_encryption(eb, fi) ||
1775 btrfs_file_extent_other_encoding(eb, fi)))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (disk_bytenr > 0)
1778 rec->found_size += num_bytes;
1780 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782 rec->extent_end = key->offset + num_bytes;
1785 * The data reloc tree will copy full extents into its inode and then
1786 * copy the corresponding csums. Because the extent it copied could be
1787 * a preallocated extent that hasn't been written to yet there may be no
1788 * csums to copy, ergo we won't have csums for our file extent. This is
1789 * ok so just don't bother checking csums if the inode belongs to the
1792 if (disk_bytenr > 0 &&
1793 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1795 if (btrfs_file_extent_compression(eb, fi))
1796 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1798 disk_bytenr += extent_offset;
1800 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1803 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1805 rec->found_csum_item = 1;
1806 if (found < num_bytes)
1807 rec->some_csum_missing = 1;
1808 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1810 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817 struct walk_control *wc)
1819 struct btrfs_key key;
1823 struct cache_tree *inode_cache;
1824 struct shared_node *active_node;
1826 if (wc->root_level == wc->active_node &&
1827 btrfs_root_refs(&root->root_item) == 0)
1830 active_node = wc->nodes[wc->active_node];
1831 inode_cache = &active_node->inode_cache;
1832 nritems = btrfs_header_nritems(eb);
1833 for (i = 0; i < nritems; i++) {
1834 btrfs_item_key_to_cpu(eb, &key, i);
1836 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1838 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1841 if (active_node->current == NULL ||
1842 active_node->current->ino < key.objectid) {
1843 if (active_node->current) {
1844 active_node->current->checked = 1;
1845 maybe_free_inode_rec(inode_cache,
1846 active_node->current);
1848 active_node->current = get_inode_rec(inode_cache,
1850 BUG_ON(IS_ERR(active_node->current));
1853 case BTRFS_DIR_ITEM_KEY:
1854 case BTRFS_DIR_INDEX_KEY:
1855 ret = process_dir_item(eb, i, &key, active_node);
1857 case BTRFS_INODE_REF_KEY:
1858 ret = process_inode_ref(eb, i, &key, active_node);
1860 case BTRFS_INODE_EXTREF_KEY:
1861 ret = process_inode_extref(eb, i, &key, active_node);
1863 case BTRFS_INODE_ITEM_KEY:
1864 ret = process_inode_item(eb, i, &key, active_node);
1866 case BTRFS_EXTENT_DATA_KEY:
1867 ret = process_file_extent(root, eb, i, &key,
1878 u64 bytenr[BTRFS_MAX_LEVEL];
1879 u64 refs[BTRFS_MAX_LEVEL];
1880 int need_check[BTRFS_MAX_LEVEL];
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884 struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886 unsigned int ext_ref);
1889 * Returns >0 Found error, not fatal, should continue
1890 * Returns <0 Fatal error, must exit the whole check
1891 * Returns 0 No errors found
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894 struct node_refs *nrefs, int *level, int ext_ref)
1896 struct extent_buffer *cur = path->nodes[0];
1897 struct btrfs_key key;
1901 int root_level = btrfs_header_level(root->node);
1903 int ret = 0; /* Final return value */
1904 int err = 0; /* Positive error bitmap */
1906 cur_bytenr = cur->start;
1908 /* skip to first inode item or the first inode number change */
1909 nritems = btrfs_header_nritems(cur);
1910 for (i = 0; i < nritems; i++) {
1911 btrfs_item_key_to_cpu(cur, &key, i);
1913 first_ino = key.objectid;
1914 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915 (first_ino && first_ino != key.objectid))
1919 path->slots[0] = nritems;
1925 err |= check_inode_item(root, path, ext_ref);
1927 if (err & LAST_ITEM)
1930 /* still have inode items in thie leaf */
1931 if (cur->start == cur_bytenr)
1935 * we have switched to another leaf, above nodes may
1936 * have changed, here walk down the path, if a node
1937 * or leaf is shared, check whether we can skip this
1940 for (i = root_level; i >= 0; i--) {
1941 if (path->nodes[i]->start == nrefs->bytenr[i])
1944 ret = update_nodes_refs(root,
1945 path->nodes[i]->start,
1950 if (!nrefs->need_check[i]) {
1956 for (i = 0; i < *level; i++) {
1957 free_extent_buffer(path->nodes[i]);
1958 path->nodes[i] = NULL;
1967 static void reada_walk_down(struct btrfs_root *root,
1968 struct extent_buffer *node, int slot)
1970 struct btrfs_fs_info *fs_info = root->fs_info;
1978 level = btrfs_header_level(node);
1982 nritems = btrfs_header_nritems(node);
1983 blocksize = fs_info->nodesize;
1984 for (i = slot; i < nritems; i++) {
1985 bytenr = btrfs_node_blockptr(node, i);
1986 ptr_gen = btrfs_node_ptr_generation(node, i);
1987 readahead_tree_block(fs_info, bytenr, blocksize, ptr_gen);
1992 * Check the child node/leaf by the following condition:
1993 * 1. the first item key of the node/leaf should be the same with the one
1995 * 2. block in parent node should match the child node/leaf.
1996 * 3. generation of parent node and child's header should be consistent.
1998 * Or the child node/leaf pointed by the key in parent is not valid.
2000 * We hope to check leaf owner too, but since subvol may share leaves,
2001 * which makes leaf owner check not so strong, key check should be
2002 * sufficient enough for that case.
2004 static int check_child_node(struct extent_buffer *parent, int slot,
2005 struct extent_buffer *child)
2007 struct btrfs_key parent_key;
2008 struct btrfs_key child_key;
2011 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2012 if (btrfs_header_level(child) == 0)
2013 btrfs_item_key_to_cpu(child, &child_key, 0);
2015 btrfs_node_key_to_cpu(child, &child_key, 0);
2017 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2020 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2021 parent_key.objectid, parent_key.type, parent_key.offset,
2022 child_key.objectid, child_key.type, child_key.offset);
2024 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2026 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2027 btrfs_node_blockptr(parent, slot),
2028 btrfs_header_bytenr(child));
2030 if (btrfs_node_ptr_generation(parent, slot) !=
2031 btrfs_header_generation(child)) {
2033 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2034 btrfs_header_generation(child),
2035 btrfs_node_ptr_generation(parent, slot));
2041 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2042 * in every fs or file tree check. Here we find its all root ids, and only check
2043 * it in the fs or file tree which has the smallest root id.
2045 static int need_check(struct btrfs_root *root, struct ulist *roots)
2047 struct rb_node *node;
2048 struct ulist_node *u;
2050 if (roots->nnodes == 1)
2053 node = rb_first(&roots->root);
2054 u = rb_entry(node, struct ulist_node, rb_node);
2056 * current root id is not smallest, we skip it and let it be checked
2057 * in the fs or file tree who hash the smallest root id.
2059 if (root->objectid != u->val)
2066 * for a tree node or leaf, we record its reference count, so later if we still
2067 * process this node or leaf, don't need to compute its reference count again.
2069 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2070 struct node_refs *nrefs, u64 level)
2074 struct ulist *roots;
2076 if (nrefs->bytenr[level] != bytenr) {
2077 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2078 level, 1, &refs, NULL);
2082 nrefs->bytenr[level] = bytenr;
2083 nrefs->refs[level] = refs;
2085 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2090 check = need_check(root, roots);
2092 nrefs->need_check[level] = check;
2094 nrefs->need_check[level] = 1;
2101 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2102 struct walk_control *wc, int *level,
2103 struct node_refs *nrefs)
2105 enum btrfs_tree_block_status status;
2108 struct btrfs_fs_info *fs_info = root->fs_info;
2109 struct extent_buffer *next;
2110 struct extent_buffer *cur;
2115 WARN_ON(*level < 0);
2116 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2118 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2119 refs = nrefs->refs[*level];
2122 ret = btrfs_lookup_extent_info(NULL, root,
2123 path->nodes[*level]->start,
2124 *level, 1, &refs, NULL);
2129 nrefs->bytenr[*level] = path->nodes[*level]->start;
2130 nrefs->refs[*level] = refs;
2134 ret = enter_shared_node(root, path->nodes[*level]->start,
2142 while (*level >= 0) {
2143 WARN_ON(*level < 0);
2144 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2145 cur = path->nodes[*level];
2147 if (btrfs_header_level(cur) != *level)
2150 if (path->slots[*level] >= btrfs_header_nritems(cur))
2153 ret = process_one_leaf(root, cur, wc);
2158 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2159 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2160 blocksize = fs_info->nodesize;
2162 if (bytenr == nrefs->bytenr[*level - 1]) {
2163 refs = nrefs->refs[*level - 1];
2165 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2166 *level - 1, 1, &refs, NULL);
2170 nrefs->bytenr[*level - 1] = bytenr;
2171 nrefs->refs[*level - 1] = refs;
2176 ret = enter_shared_node(root, bytenr, refs,
2179 path->slots[*level]++;
2184 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2185 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2186 free_extent_buffer(next);
2187 reada_walk_down(root, cur, path->slots[*level]);
2188 next = read_tree_block(root->fs_info, bytenr, blocksize,
2190 if (!extent_buffer_uptodate(next)) {
2191 struct btrfs_key node_key;
2193 btrfs_node_key_to_cpu(path->nodes[*level],
2195 path->slots[*level]);
2196 btrfs_add_corrupt_extent_record(root->fs_info,
2198 path->nodes[*level]->start,
2199 root->fs_info->nodesize,
2206 ret = check_child_node(cur, path->slots[*level], next);
2208 free_extent_buffer(next);
2213 if (btrfs_is_leaf(next))
2214 status = btrfs_check_leaf(root, NULL, next);
2216 status = btrfs_check_node(root, NULL, next);
2217 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2218 free_extent_buffer(next);
2223 *level = *level - 1;
2224 free_extent_buffer(path->nodes[*level]);
2225 path->nodes[*level] = next;
2226 path->slots[*level] = 0;
2229 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2233 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2234 unsigned int ext_ref);
2237 * Returns >0 Found error, should continue
2238 * Returns <0 Fatal error, must exit the whole check
2239 * Returns 0 No errors found
2241 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2242 int *level, struct node_refs *nrefs, int ext_ref)
2244 enum btrfs_tree_block_status status;
2247 struct btrfs_fs_info *fs_info = root->fs_info;
2248 struct extent_buffer *next;
2249 struct extent_buffer *cur;
2253 WARN_ON(*level < 0);
2254 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2256 ret = update_nodes_refs(root, path->nodes[*level]->start,
2261 while (*level >= 0) {
2262 WARN_ON(*level < 0);
2263 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2264 cur = path->nodes[*level];
2266 if (btrfs_header_level(cur) != *level)
2269 if (path->slots[*level] >= btrfs_header_nritems(cur))
2271 /* Don't forgot to check leaf/node validation */
2273 ret = btrfs_check_leaf(root, NULL, cur);
2274 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2278 ret = process_one_leaf_v2(root, path, nrefs,
2282 ret = btrfs_check_node(root, NULL, cur);
2283 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2288 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2289 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2290 blocksize = fs_info->nodesize;
2292 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2295 if (!nrefs->need_check[*level - 1]) {
2296 path->slots[*level]++;
2300 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2301 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2302 free_extent_buffer(next);
2303 reada_walk_down(root, cur, path->slots[*level]);
2304 next = read_tree_block(fs_info, bytenr, blocksize,
2306 if (!extent_buffer_uptodate(next)) {
2307 struct btrfs_key node_key;
2309 btrfs_node_key_to_cpu(path->nodes[*level],
2311 path->slots[*level]);
2312 btrfs_add_corrupt_extent_record(fs_info,
2314 path->nodes[*level]->start,
2322 ret = check_child_node(cur, path->slots[*level], next);
2326 if (btrfs_is_leaf(next))
2327 status = btrfs_check_leaf(root, NULL, next);
2329 status = btrfs_check_node(root, NULL, next);
2330 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2331 free_extent_buffer(next);
2336 *level = *level - 1;
2337 free_extent_buffer(path->nodes[*level]);
2338 path->nodes[*level] = next;
2339 path->slots[*level] = 0;
2344 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2345 struct walk_control *wc, int *level)
2348 struct extent_buffer *leaf;
2350 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2351 leaf = path->nodes[i];
2352 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2357 free_extent_buffer(path->nodes[*level]);
2358 path->nodes[*level] = NULL;
2359 BUG_ON(*level > wc->active_node);
2360 if (*level == wc->active_node)
2361 leave_shared_node(root, wc, *level);
2368 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2372 struct extent_buffer *leaf;
2374 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2375 leaf = path->nodes[i];
2376 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2381 free_extent_buffer(path->nodes[*level]);
2382 path->nodes[*level] = NULL;
2389 static int check_root_dir(struct inode_record *rec)
2391 struct inode_backref *backref;
2394 if (!rec->found_inode_item || rec->errors)
2396 if (rec->nlink != 1 || rec->found_link != 0)
2398 if (list_empty(&rec->backrefs))
2400 backref = to_inode_backref(rec->backrefs.next);
2401 if (!backref->found_inode_ref)
2403 if (backref->index != 0 || backref->namelen != 2 ||
2404 memcmp(backref->name, "..", 2))
2406 if (backref->found_dir_index || backref->found_dir_item)
2413 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2414 struct btrfs_root *root, struct btrfs_path *path,
2415 struct inode_record *rec)
2417 struct btrfs_inode_item *ei;
2418 struct btrfs_key key;
2421 key.objectid = rec->ino;
2422 key.type = BTRFS_INODE_ITEM_KEY;
2423 key.offset = (u64)-1;
2425 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2429 if (!path->slots[0]) {
2436 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2437 if (key.objectid != rec->ino) {
2442 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2443 struct btrfs_inode_item);
2444 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2445 btrfs_mark_buffer_dirty(path->nodes[0]);
2446 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2447 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2448 root->root_key.objectid);
2450 btrfs_release_path(path);
2454 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2455 struct btrfs_root *root,
2456 struct btrfs_path *path,
2457 struct inode_record *rec)
2461 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2462 btrfs_release_path(path);
2464 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2468 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2469 struct btrfs_root *root,
2470 struct btrfs_path *path,
2471 struct inode_record *rec)
2473 struct btrfs_inode_item *ei;
2474 struct btrfs_key key;
2477 key.objectid = rec->ino;
2478 key.type = BTRFS_INODE_ITEM_KEY;
2481 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2488 /* Since ret == 0, no need to check anything */
2489 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2490 struct btrfs_inode_item);
2491 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2492 btrfs_mark_buffer_dirty(path->nodes[0]);
2493 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2494 printf("reset nbytes for ino %llu root %llu\n",
2495 rec->ino, root->root_key.objectid);
2497 btrfs_release_path(path);
2501 static int add_missing_dir_index(struct btrfs_root *root,
2502 struct cache_tree *inode_cache,
2503 struct inode_record *rec,
2504 struct inode_backref *backref)
2506 struct btrfs_path path;
2507 struct btrfs_trans_handle *trans;
2508 struct btrfs_dir_item *dir_item;
2509 struct extent_buffer *leaf;
2510 struct btrfs_key key;
2511 struct btrfs_disk_key disk_key;
2512 struct inode_record *dir_rec;
2513 unsigned long name_ptr;
2514 u32 data_size = sizeof(*dir_item) + backref->namelen;
2517 trans = btrfs_start_transaction(root, 1);
2519 return PTR_ERR(trans);
2521 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2522 (unsigned long long)rec->ino);
2524 btrfs_init_path(&path);
2525 key.objectid = backref->dir;
2526 key.type = BTRFS_DIR_INDEX_KEY;
2527 key.offset = backref->index;
2528 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2531 leaf = path.nodes[0];
2532 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2534 disk_key.objectid = cpu_to_le64(rec->ino);
2535 disk_key.type = BTRFS_INODE_ITEM_KEY;
2536 disk_key.offset = 0;
2538 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2539 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2540 btrfs_set_dir_data_len(leaf, dir_item, 0);
2541 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2542 name_ptr = (unsigned long)(dir_item + 1);
2543 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2544 btrfs_mark_buffer_dirty(leaf);
2545 btrfs_release_path(&path);
2546 btrfs_commit_transaction(trans, root);
2548 backref->found_dir_index = 1;
2549 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2550 BUG_ON(IS_ERR(dir_rec));
2553 dir_rec->found_size += backref->namelen;
2554 if (dir_rec->found_size == dir_rec->isize &&
2555 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2556 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2557 if (dir_rec->found_size != dir_rec->isize)
2558 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2563 static int delete_dir_index(struct btrfs_root *root,
2564 struct inode_backref *backref)
2566 struct btrfs_trans_handle *trans;
2567 struct btrfs_dir_item *di;
2568 struct btrfs_path path;
2571 trans = btrfs_start_transaction(root, 1);
2573 return PTR_ERR(trans);
2575 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2576 (unsigned long long)backref->dir,
2577 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2578 (unsigned long long)root->objectid);
2580 btrfs_init_path(&path);
2581 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2582 backref->name, backref->namelen,
2583 backref->index, -1);
2586 btrfs_release_path(&path);
2587 btrfs_commit_transaction(trans, root);
2594 ret = btrfs_del_item(trans, root, &path);
2596 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2598 btrfs_release_path(&path);
2599 btrfs_commit_transaction(trans, root);
2603 static int create_inode_item(struct btrfs_root *root,
2604 struct inode_record *rec,
2607 struct btrfs_trans_handle *trans;
2608 struct btrfs_inode_item inode_item;
2609 time_t now = time(NULL);
2612 trans = btrfs_start_transaction(root, 1);
2613 if (IS_ERR(trans)) {
2614 ret = PTR_ERR(trans);
2618 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2619 "be incomplete, please check permissions and content after "
2620 "the fsck completes.\n", (unsigned long long)root->objectid,
2621 (unsigned long long)rec->ino);
2623 memset(&inode_item, 0, sizeof(inode_item));
2624 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2626 btrfs_set_stack_inode_nlink(&inode_item, 1);
2628 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2629 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2630 if (rec->found_dir_item) {
2631 if (rec->found_file_extent)
2632 fprintf(stderr, "root %llu inode %llu has both a dir "
2633 "item and extents, unsure if it is a dir or a "
2634 "regular file so setting it as a directory\n",
2635 (unsigned long long)root->objectid,
2636 (unsigned long long)rec->ino);
2637 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2638 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2639 } else if (!rec->found_dir_item) {
2640 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2641 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2643 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2644 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2645 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2646 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2647 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2648 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2649 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2650 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2652 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2654 btrfs_commit_transaction(trans, root);
2658 static int repair_inode_backrefs(struct btrfs_root *root,
2659 struct inode_record *rec,
2660 struct cache_tree *inode_cache,
2663 struct inode_backref *tmp, *backref;
2664 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2668 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2669 if (!delete && rec->ino == root_dirid) {
2670 if (!rec->found_inode_item) {
2671 ret = create_inode_item(root, rec, 1);
2678 /* Index 0 for root dir's are special, don't mess with it */
2679 if (rec->ino == root_dirid && backref->index == 0)
2683 ((backref->found_dir_index && !backref->found_inode_ref) ||
2684 (backref->found_dir_index && backref->found_inode_ref &&
2685 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2686 ret = delete_dir_index(root, backref);
2690 list_del(&backref->list);
2695 if (!delete && !backref->found_dir_index &&
2696 backref->found_dir_item && backref->found_inode_ref) {
2697 ret = add_missing_dir_index(root, inode_cache, rec,
2702 if (backref->found_dir_item &&
2703 backref->found_dir_index) {
2704 if (!backref->errors &&
2705 backref->found_inode_ref) {
2706 list_del(&backref->list);
2713 if (!delete && (!backref->found_dir_index &&
2714 !backref->found_dir_item &&
2715 backref->found_inode_ref)) {
2716 struct btrfs_trans_handle *trans;
2717 struct btrfs_key location;
2719 ret = check_dir_conflict(root, backref->name,
2725 * let nlink fixing routine to handle it,
2726 * which can do it better.
2731 location.objectid = rec->ino;
2732 location.type = BTRFS_INODE_ITEM_KEY;
2733 location.offset = 0;
2735 trans = btrfs_start_transaction(root, 1);
2736 if (IS_ERR(trans)) {
2737 ret = PTR_ERR(trans);
2740 fprintf(stderr, "adding missing dir index/item pair "
2742 (unsigned long long)rec->ino);
2743 ret = btrfs_insert_dir_item(trans, root, backref->name,
2745 backref->dir, &location,
2746 imode_to_type(rec->imode),
2749 btrfs_commit_transaction(trans, root);
2753 if (!delete && (backref->found_inode_ref &&
2754 backref->found_dir_index &&
2755 backref->found_dir_item &&
2756 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2757 !rec->found_inode_item)) {
2758 ret = create_inode_item(root, rec, 0);
2765 return ret ? ret : repaired;
2769 * To determine the file type for nlink/inode_item repair
2771 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2772 * Return -ENOENT if file type is not found.
2774 static int find_file_type(struct inode_record *rec, u8 *type)
2776 struct inode_backref *backref;
2778 /* For inode item recovered case */
2779 if (rec->found_inode_item) {
2780 *type = imode_to_type(rec->imode);
2784 list_for_each_entry(backref, &rec->backrefs, list) {
2785 if (backref->found_dir_index || backref->found_dir_item) {
2786 *type = backref->filetype;
2794 * To determine the file name for nlink repair
2796 * Return 0 if file name is found, set name and namelen.
2797 * Return -ENOENT if file name is not found.
2799 static int find_file_name(struct inode_record *rec,
2800 char *name, int *namelen)
2802 struct inode_backref *backref;
2804 list_for_each_entry(backref, &rec->backrefs, list) {
2805 if (backref->found_dir_index || backref->found_dir_item ||
2806 backref->found_inode_ref) {
2807 memcpy(name, backref->name, backref->namelen);
2808 *namelen = backref->namelen;
2815 /* Reset the nlink of the inode to the correct one */
2816 static int reset_nlink(struct btrfs_trans_handle *trans,
2817 struct btrfs_root *root,
2818 struct btrfs_path *path,
2819 struct inode_record *rec)
2821 struct inode_backref *backref;
2822 struct inode_backref *tmp;
2823 struct btrfs_key key;
2824 struct btrfs_inode_item *inode_item;
2827 /* We don't believe this either, reset it and iterate backref */
2828 rec->found_link = 0;
2830 /* Remove all backref including the valid ones */
2831 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2832 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2833 backref->index, backref->name,
2834 backref->namelen, 0);
2838 /* remove invalid backref, so it won't be added back */
2839 if (!(backref->found_dir_index &&
2840 backref->found_dir_item &&
2841 backref->found_inode_ref)) {
2842 list_del(&backref->list);
2849 /* Set nlink to 0 */
2850 key.objectid = rec->ino;
2851 key.type = BTRFS_INODE_ITEM_KEY;
2853 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2860 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2861 struct btrfs_inode_item);
2862 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2863 btrfs_mark_buffer_dirty(path->nodes[0]);
2864 btrfs_release_path(path);
2867 * Add back valid inode_ref/dir_item/dir_index,
2868 * add_link() will handle the nlink inc, so new nlink must be correct
2870 list_for_each_entry(backref, &rec->backrefs, list) {
2871 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2872 backref->name, backref->namelen,
2873 backref->filetype, &backref->index, 1);
2878 btrfs_release_path(path);
2882 static int get_highest_inode(struct btrfs_trans_handle *trans,
2883 struct btrfs_root *root,
2884 struct btrfs_path *path,
2887 struct btrfs_key key, found_key;
2890 btrfs_init_path(path);
2891 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2893 key.type = BTRFS_INODE_ITEM_KEY;
2894 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2896 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2897 path->slots[0] - 1);
2898 *highest_ino = found_key.objectid;
2901 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2903 btrfs_release_path(path);
2907 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2908 struct btrfs_root *root,
2909 struct btrfs_path *path,
2910 struct inode_record *rec)
2912 char *dir_name = "lost+found";
2913 char namebuf[BTRFS_NAME_LEN] = {0};
2918 int name_recovered = 0;
2919 int type_recovered = 0;
2923 * Get file name and type first before these invalid inode ref
2924 * are deleted by remove_all_invalid_backref()
2926 name_recovered = !find_file_name(rec, namebuf, &namelen);
2927 type_recovered = !find_file_type(rec, &type);
2929 if (!name_recovered) {
2930 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2931 rec->ino, rec->ino);
2932 namelen = count_digits(rec->ino);
2933 sprintf(namebuf, "%llu", rec->ino);
2936 if (!type_recovered) {
2937 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2939 type = BTRFS_FT_REG_FILE;
2943 ret = reset_nlink(trans, root, path, rec);
2946 "Failed to reset nlink for inode %llu: %s\n",
2947 rec->ino, strerror(-ret));
2951 if (rec->found_link == 0) {
2952 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2956 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2957 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2960 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2961 dir_name, strerror(-ret));
2964 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2965 namebuf, namelen, type, NULL, 1);
2967 * Add ".INO" suffix several times to handle case where
2968 * "FILENAME.INO" is already taken by another file.
2970 while (ret == -EEXIST) {
2972 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2974 if (namelen + count_digits(rec->ino) + 1 >
2979 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2981 namelen += count_digits(rec->ino) + 1;
2982 ret = btrfs_add_link(trans, root, rec->ino,
2983 lost_found_ino, namebuf,
2984 namelen, type, NULL, 1);
2988 "Failed to link the inode %llu to %s dir: %s\n",
2989 rec->ino, dir_name, strerror(-ret));
2993 * Just increase the found_link, don't actually add the
2994 * backref. This will make things easier and this inode
2995 * record will be freed after the repair is done.
2996 * So fsck will not report problem about this inode.
2999 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3000 namelen, namebuf, dir_name);
3002 printf("Fixed the nlink of inode %llu\n", rec->ino);
3005 * Clear the flag anyway, or we will loop forever for the same inode
3006 * as it will not be removed from the bad inode list and the dead loop
3009 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3010 btrfs_release_path(path);
3015 * Check if there is any normal(reg or prealloc) file extent for given
3017 * This is used to determine the file type when neither its dir_index/item or
3018 * inode_item exists.
3020 * This will *NOT* report error, if any error happens, just consider it does
3021 * not have any normal file extent.
3023 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3025 struct btrfs_path path;
3026 struct btrfs_key key;
3027 struct btrfs_key found_key;
3028 struct btrfs_file_extent_item *fi;
3032 btrfs_init_path(&path);
3034 key.type = BTRFS_EXTENT_DATA_KEY;
3037 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3042 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3043 ret = btrfs_next_leaf(root, &path);
3050 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3052 if (found_key.objectid != ino ||
3053 found_key.type != BTRFS_EXTENT_DATA_KEY)
3055 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3056 struct btrfs_file_extent_item);
3057 type = btrfs_file_extent_type(path.nodes[0], fi);
3058 if (type != BTRFS_FILE_EXTENT_INLINE) {
3064 btrfs_release_path(&path);
3068 static u32 btrfs_type_to_imode(u8 type)
3070 static u32 imode_by_btrfs_type[] = {
3071 [BTRFS_FT_REG_FILE] = S_IFREG,
3072 [BTRFS_FT_DIR] = S_IFDIR,
3073 [BTRFS_FT_CHRDEV] = S_IFCHR,
3074 [BTRFS_FT_BLKDEV] = S_IFBLK,
3075 [BTRFS_FT_FIFO] = S_IFIFO,
3076 [BTRFS_FT_SOCK] = S_IFSOCK,
3077 [BTRFS_FT_SYMLINK] = S_IFLNK,
3080 return imode_by_btrfs_type[(type)];
3083 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3084 struct btrfs_root *root,
3085 struct btrfs_path *path,
3086 struct inode_record *rec)
3090 int type_recovered = 0;
3093 printf("Trying to rebuild inode:%llu\n", rec->ino);
3095 type_recovered = !find_file_type(rec, &filetype);
3098 * Try to determine inode type if type not found.
3100 * For found regular file extent, it must be FILE.
3101 * For found dir_item/index, it must be DIR.
3103 * For undetermined one, use FILE as fallback.
3106 * 1. If found backref(inode_index/item is already handled) to it,
3108 * Need new inode-inode ref structure to allow search for that.
3110 if (!type_recovered) {
3111 if (rec->found_file_extent &&
3112 find_normal_file_extent(root, rec->ino)) {
3114 filetype = BTRFS_FT_REG_FILE;
3115 } else if (rec->found_dir_item) {
3117 filetype = BTRFS_FT_DIR;
3118 } else if (!list_empty(&rec->orphan_extents)) {
3120 filetype = BTRFS_FT_REG_FILE;
3122 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3125 filetype = BTRFS_FT_REG_FILE;
3129 ret = btrfs_new_inode(trans, root, rec->ino,
3130 mode | btrfs_type_to_imode(filetype));
3135 * Here inode rebuild is done, we only rebuild the inode item,
3136 * don't repair the nlink(like move to lost+found).
3137 * That is the job of nlink repair.
3139 * We just fill the record and return
3141 rec->found_dir_item = 1;
3142 rec->imode = mode | btrfs_type_to_imode(filetype);
3144 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3145 /* Ensure the inode_nlinks repair function will be called */
3146 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3151 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3152 struct btrfs_root *root,
3153 struct btrfs_path *path,
3154 struct inode_record *rec)
3156 struct orphan_data_extent *orphan;
3157 struct orphan_data_extent *tmp;
3160 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3162 * Check for conflicting file extents
3164 * Here we don't know whether the extents is compressed or not,
3165 * so we can only assume it not compressed nor data offset,
3166 * and use its disk_len as extent length.
3168 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3169 orphan->offset, orphan->disk_len, 0);
3170 btrfs_release_path(path);
3175 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3176 orphan->disk_bytenr, orphan->disk_len);
3177 ret = btrfs_free_extent(trans,
3178 root->fs_info->extent_root,
3179 orphan->disk_bytenr, orphan->disk_len,
3180 0, root->objectid, orphan->objectid,
3185 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3186 orphan->offset, orphan->disk_bytenr,
3187 orphan->disk_len, orphan->disk_len);
3191 /* Update file size info */
3192 rec->found_size += orphan->disk_len;
3193 if (rec->found_size == rec->nbytes)
3194 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3196 /* Update the file extent hole info too */
3197 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3201 if (RB_EMPTY_ROOT(&rec->holes))
3202 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3204 list_del(&orphan->list);
3207 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3212 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3213 struct btrfs_root *root,
3214 struct btrfs_path *path,
3215 struct inode_record *rec)
3217 struct rb_node *node;
3218 struct file_extent_hole *hole;
3222 node = rb_first(&rec->holes);
3226 hole = rb_entry(node, struct file_extent_hole, node);
3227 ret = btrfs_punch_hole(trans, root, rec->ino,
3228 hole->start, hole->len);
3231 ret = del_file_extent_hole(&rec->holes, hole->start,
3235 if (RB_EMPTY_ROOT(&rec->holes))
3236 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3237 node = rb_first(&rec->holes);
3239 /* special case for a file losing all its file extent */
3241 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3242 round_up(rec->isize,
3243 root->fs_info->sectorsize));
3247 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3248 rec->ino, root->objectid);
3253 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3255 struct btrfs_trans_handle *trans;
3256 struct btrfs_path path;
3259 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3260 I_ERR_NO_ORPHAN_ITEM |
3261 I_ERR_LINK_COUNT_WRONG |
3262 I_ERR_NO_INODE_ITEM |
3263 I_ERR_FILE_EXTENT_ORPHAN |
3264 I_ERR_FILE_EXTENT_DISCOUNT|
3265 I_ERR_FILE_NBYTES_WRONG)))
3269 * For nlink repair, it may create a dir and add link, so
3270 * 2 for parent(256)'s dir_index and dir_item
3271 * 2 for lost+found dir's inode_item and inode_ref
3272 * 1 for the new inode_ref of the file
3273 * 2 for lost+found dir's dir_index and dir_item for the file
3275 trans = btrfs_start_transaction(root, 7);
3277 return PTR_ERR(trans);
3279 btrfs_init_path(&path);
3280 if (rec->errors & I_ERR_NO_INODE_ITEM)
3281 ret = repair_inode_no_item(trans, root, &path, rec);
3282 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3283 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3284 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3285 ret = repair_inode_discount_extent(trans, root, &path, rec);
3286 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3287 ret = repair_inode_isize(trans, root, &path, rec);
3288 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3289 ret = repair_inode_orphan_item(trans, root, &path, rec);
3290 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3291 ret = repair_inode_nlinks(trans, root, &path, rec);
3292 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3293 ret = repair_inode_nbytes(trans, root, &path, rec);
3294 btrfs_commit_transaction(trans, root);
3295 btrfs_release_path(&path);
3299 static int check_inode_recs(struct btrfs_root *root,
3300 struct cache_tree *inode_cache)
3302 struct cache_extent *cache;
3303 struct ptr_node *node;
3304 struct inode_record *rec;
3305 struct inode_backref *backref;
3310 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3312 if (btrfs_root_refs(&root->root_item) == 0) {
3313 if (!cache_tree_empty(inode_cache))
3314 fprintf(stderr, "warning line %d\n", __LINE__);
3319 * We need to repair backrefs first because we could change some of the
3320 * errors in the inode recs.
3322 * We also need to go through and delete invalid backrefs first and then
3323 * add the correct ones second. We do this because we may get EEXIST
3324 * when adding back the correct index because we hadn't yet deleted the
3327 * For example, if we were missing a dir index then the directories
3328 * isize would be wrong, so if we fixed the isize to what we thought it
3329 * would be and then fixed the backref we'd still have a invalid fs, so
3330 * we need to add back the dir index and then check to see if the isize
3335 if (stage == 3 && !err)
3338 cache = search_cache_extent(inode_cache, 0);
3339 while (repair && cache) {
3340 node = container_of(cache, struct ptr_node, cache);
3342 cache = next_cache_extent(cache);
3344 /* Need to free everything up and rescan */
3346 remove_cache_extent(inode_cache, &node->cache);
3348 free_inode_rec(rec);
3352 if (list_empty(&rec->backrefs))
3355 ret = repair_inode_backrefs(root, rec, inode_cache,
3369 rec = get_inode_rec(inode_cache, root_dirid, 0);
3370 BUG_ON(IS_ERR(rec));
3372 ret = check_root_dir(rec);
3374 fprintf(stderr, "root %llu root dir %llu error\n",
3375 (unsigned long long)root->root_key.objectid,
3376 (unsigned long long)root_dirid);
3377 print_inode_error(root, rec);
3382 struct btrfs_trans_handle *trans;
3384 trans = btrfs_start_transaction(root, 1);
3385 if (IS_ERR(trans)) {
3386 err = PTR_ERR(trans);
3391 "root %llu missing its root dir, recreating\n",
3392 (unsigned long long)root->objectid);
3394 ret = btrfs_make_root_dir(trans, root, root_dirid);
3397 btrfs_commit_transaction(trans, root);
3401 fprintf(stderr, "root %llu root dir %llu not found\n",
3402 (unsigned long long)root->root_key.objectid,
3403 (unsigned long long)root_dirid);
3407 cache = search_cache_extent(inode_cache, 0);
3410 node = container_of(cache, struct ptr_node, cache);
3412 remove_cache_extent(inode_cache, &node->cache);
3414 if (rec->ino == root_dirid ||
3415 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3416 free_inode_rec(rec);
3420 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3421 ret = check_orphan_item(root, rec->ino);
3423 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3424 if (can_free_inode_rec(rec)) {
3425 free_inode_rec(rec);
3430 if (!rec->found_inode_item)
3431 rec->errors |= I_ERR_NO_INODE_ITEM;
3432 if (rec->found_link != rec->nlink)
3433 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3435 ret = try_repair_inode(root, rec);
3436 if (ret == 0 && can_free_inode_rec(rec)) {
3437 free_inode_rec(rec);
3443 if (!(repair && ret == 0))
3445 print_inode_error(root, rec);
3446 list_for_each_entry(backref, &rec->backrefs, list) {
3447 if (!backref->found_dir_item)
3448 backref->errors |= REF_ERR_NO_DIR_ITEM;
3449 if (!backref->found_dir_index)
3450 backref->errors |= REF_ERR_NO_DIR_INDEX;
3451 if (!backref->found_inode_ref)
3452 backref->errors |= REF_ERR_NO_INODE_REF;
3453 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3454 " namelen %u name %s filetype %d errors %x",
3455 (unsigned long long)backref->dir,
3456 (unsigned long long)backref->index,
3457 backref->namelen, backref->name,
3458 backref->filetype, backref->errors);
3459 print_ref_error(backref->errors);
3461 free_inode_rec(rec);
3463 return (error > 0) ? -1 : 0;
3466 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3469 struct cache_extent *cache;
3470 struct root_record *rec = NULL;
3473 cache = lookup_cache_extent(root_cache, objectid, 1);
3475 rec = container_of(cache, struct root_record, cache);
3477 rec = calloc(1, sizeof(*rec));
3479 return ERR_PTR(-ENOMEM);
3480 rec->objectid = objectid;
3481 INIT_LIST_HEAD(&rec->backrefs);
3482 rec->cache.start = objectid;
3483 rec->cache.size = 1;
3485 ret = insert_cache_extent(root_cache, &rec->cache);
3487 return ERR_PTR(-EEXIST);
3492 static struct root_backref *get_root_backref(struct root_record *rec,
3493 u64 ref_root, u64 dir, u64 index,
3494 const char *name, int namelen)
3496 struct root_backref *backref;
3498 list_for_each_entry(backref, &rec->backrefs, list) {
3499 if (backref->ref_root != ref_root || backref->dir != dir ||
3500 backref->namelen != namelen)
3502 if (memcmp(name, backref->name, namelen))
3507 backref = calloc(1, sizeof(*backref) + namelen + 1);
3510 backref->ref_root = ref_root;
3512 backref->index = index;
3513 backref->namelen = namelen;
3514 memcpy(backref->name, name, namelen);
3515 backref->name[namelen] = '\0';
3516 list_add_tail(&backref->list, &rec->backrefs);
3520 static void free_root_record(struct cache_extent *cache)
3522 struct root_record *rec;
3523 struct root_backref *backref;
3525 rec = container_of(cache, struct root_record, cache);
3526 while (!list_empty(&rec->backrefs)) {
3527 backref = to_root_backref(rec->backrefs.next);
3528 list_del(&backref->list);
3535 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3537 static int add_root_backref(struct cache_tree *root_cache,
3538 u64 root_id, u64 ref_root, u64 dir, u64 index,
3539 const char *name, int namelen,
3540 int item_type, int errors)
3542 struct root_record *rec;
3543 struct root_backref *backref;
3545 rec = get_root_rec(root_cache, root_id);
3546 BUG_ON(IS_ERR(rec));
3547 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3550 backref->errors |= errors;
3552 if (item_type != BTRFS_DIR_ITEM_KEY) {
3553 if (backref->found_dir_index || backref->found_back_ref ||
3554 backref->found_forward_ref) {
3555 if (backref->index != index)
3556 backref->errors |= REF_ERR_INDEX_UNMATCH;
3558 backref->index = index;
3562 if (item_type == BTRFS_DIR_ITEM_KEY) {
3563 if (backref->found_forward_ref)
3565 backref->found_dir_item = 1;
3566 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3567 backref->found_dir_index = 1;
3568 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3569 if (backref->found_forward_ref)
3570 backref->errors |= REF_ERR_DUP_ROOT_REF;
3571 else if (backref->found_dir_item)
3573 backref->found_forward_ref = 1;
3574 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3575 if (backref->found_back_ref)
3576 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3577 backref->found_back_ref = 1;
3582 if (backref->found_forward_ref && backref->found_dir_item)
3583 backref->reachable = 1;
3587 static int merge_root_recs(struct btrfs_root *root,
3588 struct cache_tree *src_cache,
3589 struct cache_tree *dst_cache)
3591 struct cache_extent *cache;
3592 struct ptr_node *node;
3593 struct inode_record *rec;
3594 struct inode_backref *backref;
3597 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3598 free_inode_recs_tree(src_cache);
3603 cache = search_cache_extent(src_cache, 0);
3606 node = container_of(cache, struct ptr_node, cache);
3608 remove_cache_extent(src_cache, &node->cache);
3611 ret = is_child_root(root, root->objectid, rec->ino);
3617 list_for_each_entry(backref, &rec->backrefs, list) {
3618 BUG_ON(backref->found_inode_ref);
3619 if (backref->found_dir_item)
3620 add_root_backref(dst_cache, rec->ino,
3621 root->root_key.objectid, backref->dir,
3622 backref->index, backref->name,
3623 backref->namelen, BTRFS_DIR_ITEM_KEY,
3625 if (backref->found_dir_index)
3626 add_root_backref(dst_cache, rec->ino,
3627 root->root_key.objectid, backref->dir,
3628 backref->index, backref->name,
3629 backref->namelen, BTRFS_DIR_INDEX_KEY,
3633 free_inode_rec(rec);
3640 static int check_root_refs(struct btrfs_root *root,
3641 struct cache_tree *root_cache)
3643 struct root_record *rec;
3644 struct root_record *ref_root;
3645 struct root_backref *backref;
3646 struct cache_extent *cache;
3652 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3653 BUG_ON(IS_ERR(rec));
3656 /* fixme: this can not detect circular references */
3659 cache = search_cache_extent(root_cache, 0);
3663 rec = container_of(cache, struct root_record, cache);
3664 cache = next_cache_extent(cache);
3666 if (rec->found_ref == 0)
3669 list_for_each_entry(backref, &rec->backrefs, list) {
3670 if (!backref->reachable)
3673 ref_root = get_root_rec(root_cache,
3675 BUG_ON(IS_ERR(ref_root));
3676 if (ref_root->found_ref > 0)
3679 backref->reachable = 0;
3681 if (rec->found_ref == 0)
3687 cache = search_cache_extent(root_cache, 0);
3691 rec = container_of(cache, struct root_record, cache);
3692 cache = next_cache_extent(cache);
3694 if (rec->found_ref == 0 &&
3695 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3696 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3697 ret = check_orphan_item(root->fs_info->tree_root,
3703 * If we don't have a root item then we likely just have
3704 * a dir item in a snapshot for this root but no actual
3705 * ref key or anything so it's meaningless.
3707 if (!rec->found_root_item)
3710 fprintf(stderr, "fs tree %llu not referenced\n",
3711 (unsigned long long)rec->objectid);
3715 if (rec->found_ref > 0 && !rec->found_root_item)
3717 list_for_each_entry(backref, &rec->backrefs, list) {
3718 if (!backref->found_dir_item)
3719 backref->errors |= REF_ERR_NO_DIR_ITEM;
3720 if (!backref->found_dir_index)
3721 backref->errors |= REF_ERR_NO_DIR_INDEX;
3722 if (!backref->found_back_ref)
3723 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3724 if (!backref->found_forward_ref)
3725 backref->errors |= REF_ERR_NO_ROOT_REF;
3726 if (backref->reachable && backref->errors)
3733 fprintf(stderr, "fs tree %llu refs %u %s\n",
3734 (unsigned long long)rec->objectid, rec->found_ref,
3735 rec->found_root_item ? "" : "not found");
3737 list_for_each_entry(backref, &rec->backrefs, list) {
3738 if (!backref->reachable)
3740 if (!backref->errors && rec->found_root_item)
3742 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3743 " index %llu namelen %u name %s errors %x\n",
3744 (unsigned long long)backref->ref_root,
3745 (unsigned long long)backref->dir,
3746 (unsigned long long)backref->index,
3747 backref->namelen, backref->name,
3749 print_ref_error(backref->errors);
3752 return errors > 0 ? 1 : 0;
3755 static int process_root_ref(struct extent_buffer *eb, int slot,
3756 struct btrfs_key *key,
3757 struct cache_tree *root_cache)
3763 struct btrfs_root_ref *ref;
3764 char namebuf[BTRFS_NAME_LEN];
3767 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3769 dirid = btrfs_root_ref_dirid(eb, ref);
3770 index = btrfs_root_ref_sequence(eb, ref);
3771 name_len = btrfs_root_ref_name_len(eb, ref);
3773 if (name_len <= BTRFS_NAME_LEN) {
3777 len = BTRFS_NAME_LEN;
3778 error = REF_ERR_NAME_TOO_LONG;
3780 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3782 if (key->type == BTRFS_ROOT_REF_KEY) {
3783 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3784 index, namebuf, len, key->type, error);
3786 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3787 index, namebuf, len, key->type, error);
3792 static void free_corrupt_block(struct cache_extent *cache)
3794 struct btrfs_corrupt_block *corrupt;
3796 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3800 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3803 * Repair the btree of the given root.
3805 * The fix is to remove the node key in corrupt_blocks cache_tree.
3806 * and rebalance the tree.
3807 * After the fix, the btree should be writeable.
3809 static int repair_btree(struct btrfs_root *root,
3810 struct cache_tree *corrupt_blocks)
3812 struct btrfs_trans_handle *trans;
3813 struct btrfs_path path;
3814 struct btrfs_corrupt_block *corrupt;
3815 struct cache_extent *cache;
3816 struct btrfs_key key;
3821 if (cache_tree_empty(corrupt_blocks))
3824 trans = btrfs_start_transaction(root, 1);
3825 if (IS_ERR(trans)) {
3826 ret = PTR_ERR(trans);
3827 fprintf(stderr, "Error starting transaction: %s\n",
3831 btrfs_init_path(&path);
3832 cache = first_cache_extent(corrupt_blocks);
3834 corrupt = container_of(cache, struct btrfs_corrupt_block,
3836 level = corrupt->level;
3837 path.lowest_level = level;
3838 key.objectid = corrupt->key.objectid;
3839 key.type = corrupt->key.type;
3840 key.offset = corrupt->key.offset;
3843 * Here we don't want to do any tree balance, since it may
3844 * cause a balance with corrupted brother leaf/node,
3845 * so ins_len set to 0 here.
3846 * Balance will be done after all corrupt node/leaf is deleted.
3848 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3851 offset = btrfs_node_blockptr(path.nodes[level],
3854 /* Remove the ptr */
3855 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3859 * Remove the corresponding extent
3860 * return value is not concerned.
3862 btrfs_release_path(&path);
3863 ret = btrfs_free_extent(trans, root, offset,
3864 root->fs_info->nodesize, 0,
3865 root->root_key.objectid, level - 1, 0);
3866 cache = next_cache_extent(cache);
3869 /* Balance the btree using btrfs_search_slot() */
3870 cache = first_cache_extent(corrupt_blocks);
3872 corrupt = container_of(cache, struct btrfs_corrupt_block,
3874 memcpy(&key, &corrupt->key, sizeof(key));
3875 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3878 /* return will always >0 since it won't find the item */
3880 btrfs_release_path(&path);
3881 cache = next_cache_extent(cache);
3884 btrfs_commit_transaction(trans, root);
3885 btrfs_release_path(&path);
3889 static int check_fs_root(struct btrfs_root *root,
3890 struct cache_tree *root_cache,
3891 struct walk_control *wc)
3897 struct btrfs_path path;
3898 struct shared_node root_node;
3899 struct root_record *rec;
3900 struct btrfs_root_item *root_item = &root->root_item;
3901 struct cache_tree corrupt_blocks;
3902 struct orphan_data_extent *orphan;
3903 struct orphan_data_extent *tmp;
3904 enum btrfs_tree_block_status status;
3905 struct node_refs nrefs;
3908 * Reuse the corrupt_block cache tree to record corrupted tree block
3910 * Unlike the usage in extent tree check, here we do it in a per
3911 * fs/subvol tree base.
3913 cache_tree_init(&corrupt_blocks);
3914 root->fs_info->corrupt_blocks = &corrupt_blocks;
3916 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3917 rec = get_root_rec(root_cache, root->root_key.objectid);
3918 BUG_ON(IS_ERR(rec));
3919 if (btrfs_root_refs(root_item) > 0)
3920 rec->found_root_item = 1;
3923 btrfs_init_path(&path);
3924 memset(&root_node, 0, sizeof(root_node));
3925 cache_tree_init(&root_node.root_cache);
3926 cache_tree_init(&root_node.inode_cache);
3927 memset(&nrefs, 0, sizeof(nrefs));
3929 /* Move the orphan extent record to corresponding inode_record */
3930 list_for_each_entry_safe(orphan, tmp,
3931 &root->orphan_data_extents, list) {
3932 struct inode_record *inode;
3934 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3936 BUG_ON(IS_ERR(inode));
3937 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3938 list_move(&orphan->list, &inode->orphan_extents);
3941 level = btrfs_header_level(root->node);
3942 memset(wc->nodes, 0, sizeof(wc->nodes));
3943 wc->nodes[level] = &root_node;
3944 wc->active_node = level;
3945 wc->root_level = level;
3947 /* We may not have checked the root block, lets do that now */
3948 if (btrfs_is_leaf(root->node))
3949 status = btrfs_check_leaf(root, NULL, root->node);
3951 status = btrfs_check_node(root, NULL, root->node);
3952 if (status != BTRFS_TREE_BLOCK_CLEAN)
3955 if (btrfs_root_refs(root_item) > 0 ||
3956 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3957 path.nodes[level] = root->node;
3958 extent_buffer_get(root->node);
3959 path.slots[level] = 0;
3961 struct btrfs_key key;
3962 struct btrfs_disk_key found_key;
3964 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3965 level = root_item->drop_level;
3966 path.lowest_level = level;
3967 if (level > btrfs_header_level(root->node) ||
3968 level >= BTRFS_MAX_LEVEL) {
3969 error("ignoring invalid drop level: %u", level);
3972 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3975 btrfs_node_key(path.nodes[level], &found_key,
3977 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3978 sizeof(found_key)));
3982 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3988 wret = walk_up_tree(root, &path, wc, &level);
3995 btrfs_release_path(&path);
3997 if (!cache_tree_empty(&corrupt_blocks)) {
3998 struct cache_extent *cache;
3999 struct btrfs_corrupt_block *corrupt;
4001 printf("The following tree block(s) is corrupted in tree %llu:\n",
4002 root->root_key.objectid);
4003 cache = first_cache_extent(&corrupt_blocks);
4005 corrupt = container_of(cache,
4006 struct btrfs_corrupt_block,
4008 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4009 cache->start, corrupt->level,
4010 corrupt->key.objectid, corrupt->key.type,
4011 corrupt->key.offset);
4012 cache = next_cache_extent(cache);
4015 printf("Try to repair the btree for root %llu\n",
4016 root->root_key.objectid);
4017 ret = repair_btree(root, &corrupt_blocks);
4019 fprintf(stderr, "Failed to repair btree: %s\n",
4022 printf("Btree for root %llu is fixed\n",
4023 root->root_key.objectid);
4027 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4031 if (root_node.current) {
4032 root_node.current->checked = 1;
4033 maybe_free_inode_rec(&root_node.inode_cache,
4037 err = check_inode_recs(root, &root_node.inode_cache);
4041 free_corrupt_blocks_tree(&corrupt_blocks);
4042 root->fs_info->corrupt_blocks = NULL;
4043 free_orphan_data_extents(&root->orphan_data_extents);
4047 static int fs_root_objectid(u64 objectid)
4049 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4050 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4052 return is_fstree(objectid);
4055 static int check_fs_roots(struct btrfs_root *root,
4056 struct cache_tree *root_cache)
4058 struct btrfs_path path;
4059 struct btrfs_key key;
4060 struct walk_control wc;
4061 struct extent_buffer *leaf, *tree_node;
4062 struct btrfs_root *tmp_root;
4063 struct btrfs_root *tree_root = root->fs_info->tree_root;
4067 if (ctx.progress_enabled) {
4068 ctx.tp = TASK_FS_ROOTS;
4069 task_start(ctx.info);
4073 * Just in case we made any changes to the extent tree that weren't
4074 * reflected into the free space cache yet.
4077 reset_cached_block_groups(root->fs_info);
4078 memset(&wc, 0, sizeof(wc));
4079 cache_tree_init(&wc.shared);
4080 btrfs_init_path(&path);
4085 key.type = BTRFS_ROOT_ITEM_KEY;
4086 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4091 tree_node = tree_root->node;
4093 if (tree_node != tree_root->node) {
4094 free_root_recs_tree(root_cache);
4095 btrfs_release_path(&path);
4098 leaf = path.nodes[0];
4099 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4100 ret = btrfs_next_leaf(tree_root, &path);
4106 leaf = path.nodes[0];
4108 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4109 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4110 fs_root_objectid(key.objectid)) {
4111 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4112 tmp_root = btrfs_read_fs_root_no_cache(
4113 root->fs_info, &key);
4115 key.offset = (u64)-1;
4116 tmp_root = btrfs_read_fs_root(
4117 root->fs_info, &key);
4119 if (IS_ERR(tmp_root)) {
4123 ret = check_fs_root(tmp_root, root_cache, &wc);
4124 if (ret == -EAGAIN) {
4125 free_root_recs_tree(root_cache);
4126 btrfs_release_path(&path);
4131 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4132 btrfs_free_fs_root(tmp_root);
4133 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4134 key.type == BTRFS_ROOT_BACKREF_KEY) {
4135 process_root_ref(leaf, path.slots[0], &key,
4142 btrfs_release_path(&path);
4144 free_extent_cache_tree(&wc.shared);
4145 if (!cache_tree_empty(&wc.shared))
4146 fprintf(stderr, "warning line %d\n", __LINE__);
4148 task_stop(ctx.info);
4154 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4155 * INODE_REF/INODE_EXTREF match.
4157 * @root: the root of the fs/file tree
4158 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4159 * @key: the key of the DIR_ITEM/DIR_INDEX
4160 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4161 * distinguish root_dir between normal dir/file
4162 * @name: the name in the INODE_REF/INODE_EXTREF
4163 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4164 * @mode: the st_mode of INODE_ITEM
4166 * Return 0 if no error occurred.
4167 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4168 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4170 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4171 * not match for normal dir/file.
4173 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4174 struct btrfs_key *key, u64 index, char *name,
4175 u32 namelen, u32 mode)
4177 struct btrfs_path path;
4178 struct extent_buffer *node;
4179 struct btrfs_dir_item *di;
4180 struct btrfs_key location;
4181 char namebuf[BTRFS_NAME_LEN] = {0};
4191 btrfs_init_path(&path);
4192 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4194 ret = DIR_ITEM_MISSING;
4198 /* Process root dir and goto out*/
4201 ret = ROOT_DIR_ERROR;
4203 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4205 ref_key->type == BTRFS_INODE_REF_KEY ?
4207 ref_key->objectid, ref_key->offset,
4208 key->type == BTRFS_DIR_ITEM_KEY ?
4209 "DIR_ITEM" : "DIR_INDEX");
4217 /* Process normal file/dir */
4219 ret = DIR_ITEM_MISSING;
4221 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4223 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4224 ref_key->objectid, ref_key->offset,
4225 key->type == BTRFS_DIR_ITEM_KEY ?
4226 "DIR_ITEM" : "DIR_INDEX",
4227 key->objectid, key->offset, namelen, name,
4228 imode_to_type(mode));
4232 /* Check whether inode_id/filetype/name match */
4233 node = path.nodes[0];
4234 slot = path.slots[0];
4235 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4236 total = btrfs_item_size_nr(node, slot);
4237 while (cur < total) {
4238 ret = DIR_ITEM_MISMATCH;
4239 name_len = btrfs_dir_name_len(node, di);
4240 data_len = btrfs_dir_data_len(node, di);
4242 btrfs_dir_item_key_to_cpu(node, di, &location);
4243 if (location.objectid != ref_key->objectid ||
4244 location.type != BTRFS_INODE_ITEM_KEY ||
4245 location.offset != 0)
4248 filetype = btrfs_dir_type(node, di);
4249 if (imode_to_type(mode) != filetype)
4252 if (cur + sizeof(*di) + name_len > total ||
4253 name_len > BTRFS_NAME_LEN) {
4254 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4256 key->type == BTRFS_DIR_ITEM_KEY ?
4257 "DIR_ITEM" : "DIR_INDEX",
4258 key->objectid, key->offset, name_len);
4260 if (cur + sizeof(*di) > total)
4262 len = min_t(u32, total - cur - sizeof(*di),
4268 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4269 if (len != namelen || strncmp(namebuf, name, len))
4275 len = sizeof(*di) + name_len + data_len;
4276 di = (struct btrfs_dir_item *)((char *)di + len);
4279 if (ret == DIR_ITEM_MISMATCH)
4281 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4283 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4284 ref_key->objectid, ref_key->offset,
4285 key->type == BTRFS_DIR_ITEM_KEY ?
4286 "DIR_ITEM" : "DIR_INDEX",
4287 key->objectid, key->offset, namelen, name,
4288 imode_to_type(mode));
4290 btrfs_release_path(&path);
4295 * Traverse the given INODE_REF and call find_dir_item() to find related
4296 * DIR_ITEM/DIR_INDEX.
4298 * @root: the root of the fs/file tree
4299 * @ref_key: the key of the INODE_REF
4300 * @refs: the count of INODE_REF
4301 * @mode: the st_mode of INODE_ITEM
4303 * Return 0 if no error occurred.
4305 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4306 struct extent_buffer *node, int slot, u64 *refs,
4309 struct btrfs_key key;
4310 struct btrfs_inode_ref *ref;
4311 char namebuf[BTRFS_NAME_LEN] = {0};
4319 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4320 total = btrfs_item_size_nr(node, slot);
4323 /* Update inode ref count */
4326 index = btrfs_inode_ref_index(node, ref);
4327 name_len = btrfs_inode_ref_name_len(node, ref);
4328 if (cur + sizeof(*ref) + name_len > total ||
4329 name_len > BTRFS_NAME_LEN) {
4330 warning("root %llu INODE_REF[%llu %llu] name too long",
4331 root->objectid, ref_key->objectid, ref_key->offset);
4333 if (total < cur + sizeof(*ref))
4335 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4340 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4342 /* Check root dir ref name */
4343 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4344 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4345 root->objectid, ref_key->objectid, ref_key->offset,
4347 err |= ROOT_DIR_ERROR;
4350 /* Find related DIR_INDEX */
4351 key.objectid = ref_key->offset;
4352 key.type = BTRFS_DIR_INDEX_KEY;
4354 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4357 /* Find related dir_item */
4358 key.objectid = ref_key->offset;
4359 key.type = BTRFS_DIR_ITEM_KEY;
4360 key.offset = btrfs_name_hash(namebuf, len);
4361 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4364 len = sizeof(*ref) + name_len;
4365 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4375 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4376 * DIR_ITEM/DIR_INDEX.
4378 * @root: the root of the fs/file tree
4379 * @ref_key: the key of the INODE_EXTREF
4380 * @refs: the count of INODE_EXTREF
4381 * @mode: the st_mode of INODE_ITEM
4383 * Return 0 if no error occurred.
4385 static int check_inode_extref(struct btrfs_root *root,
4386 struct btrfs_key *ref_key,
4387 struct extent_buffer *node, int slot, u64 *refs,
4390 struct btrfs_key key;
4391 struct btrfs_inode_extref *extref;
4392 char namebuf[BTRFS_NAME_LEN] = {0};
4402 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4403 total = btrfs_item_size_nr(node, slot);
4406 /* update inode ref count */
4408 name_len = btrfs_inode_extref_name_len(node, extref);
4409 index = btrfs_inode_extref_index(node, extref);
4410 parent = btrfs_inode_extref_parent(node, extref);
4411 if (name_len <= BTRFS_NAME_LEN) {
4414 len = BTRFS_NAME_LEN;
4415 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4416 root->objectid, ref_key->objectid, ref_key->offset);
4418 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4420 /* Check root dir ref name */
4421 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4422 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4423 root->objectid, ref_key->objectid, ref_key->offset,
4425 err |= ROOT_DIR_ERROR;
4428 /* find related dir_index */
4429 key.objectid = parent;
4430 key.type = BTRFS_DIR_INDEX_KEY;
4432 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4435 /* find related dir_item */
4436 key.objectid = parent;
4437 key.type = BTRFS_DIR_ITEM_KEY;
4438 key.offset = btrfs_name_hash(namebuf, len);
4439 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4442 len = sizeof(*extref) + name_len;
4443 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4453 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4454 * DIR_ITEM/DIR_INDEX match.
4456 * @root: the root of the fs/file tree
4457 * @key: the key of the INODE_REF/INODE_EXTREF
4458 * @name: the name in the INODE_REF/INODE_EXTREF
4459 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4460 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4462 * @ext_ref: the EXTENDED_IREF feature
4464 * Return 0 if no error occurred.
4465 * Return >0 for error bitmap
4467 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4468 char *name, int namelen, u64 index,
4469 unsigned int ext_ref)
4471 struct btrfs_path path;
4472 struct btrfs_inode_ref *ref;
4473 struct btrfs_inode_extref *extref;
4474 struct extent_buffer *node;
4475 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4486 btrfs_init_path(&path);
4487 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4489 ret = INODE_REF_MISSING;
4493 node = path.nodes[0];
4494 slot = path.slots[0];
4496 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4497 total = btrfs_item_size_nr(node, slot);
4499 /* Iterate all entry of INODE_REF */
4500 while (cur < total) {
4501 ret = INODE_REF_MISSING;
4503 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4504 ref_index = btrfs_inode_ref_index(node, ref);
4505 if (index != (u64)-1 && index != ref_index)
4508 if (cur + sizeof(*ref) + ref_namelen > total ||
4509 ref_namelen > BTRFS_NAME_LEN) {
4510 warning("root %llu INODE %s[%llu %llu] name too long",
4512 key->type == BTRFS_INODE_REF_KEY ?
4514 key->objectid, key->offset);
4516 if (cur + sizeof(*ref) > total)
4518 len = min_t(u32, total - cur - sizeof(*ref),
4524 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4527 if (len != namelen || strncmp(ref_namebuf, name, len))
4533 len = sizeof(*ref) + ref_namelen;
4534 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4539 /* Skip if not support EXTENDED_IREF feature */
4543 btrfs_release_path(&path);
4544 btrfs_init_path(&path);
4546 dir_id = key->offset;
4547 key->type = BTRFS_INODE_EXTREF_KEY;
4548 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4550 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4552 ret = INODE_REF_MISSING;
4556 node = path.nodes[0];
4557 slot = path.slots[0];
4559 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4561 total = btrfs_item_size_nr(node, slot);
4563 /* Iterate all entry of INODE_EXTREF */
4564 while (cur < total) {
4565 ret = INODE_REF_MISSING;
4567 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4568 ref_index = btrfs_inode_extref_index(node, extref);
4569 parent = btrfs_inode_extref_parent(node, extref);
4570 if (index != (u64)-1 && index != ref_index)
4573 if (parent != dir_id)
4576 if (ref_namelen <= BTRFS_NAME_LEN) {
4579 len = BTRFS_NAME_LEN;
4580 warning("root %llu INODE %s[%llu %llu] name too long",
4582 key->type == BTRFS_INODE_REF_KEY ?
4584 key->objectid, key->offset);
4586 read_extent_buffer(node, ref_namebuf,
4587 (unsigned long)(extref + 1), len);
4589 if (len != namelen || strncmp(ref_namebuf, name, len))
4596 len = sizeof(*extref) + ref_namelen;
4597 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4602 btrfs_release_path(&path);
4607 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4608 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4610 * @root: the root of the fs/file tree
4611 * @key: the key of the INODE_REF/INODE_EXTREF
4612 * @size: the st_size of the INODE_ITEM
4613 * @ext_ref: the EXTENDED_IREF feature
4615 * Return 0 if no error occurred.
4617 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4618 struct extent_buffer *node, int slot, u64 *size,
4619 unsigned int ext_ref)
4621 struct btrfs_dir_item *di;
4622 struct btrfs_inode_item *ii;
4623 struct btrfs_path path;
4624 struct btrfs_key location;
4625 char namebuf[BTRFS_NAME_LEN] = {0};
4638 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4639 * ignore index check.
4641 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4643 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4644 total = btrfs_item_size_nr(node, slot);
4646 while (cur < total) {
4647 data_len = btrfs_dir_data_len(node, di);
4649 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4650 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4651 "DIR_ITEM" : "DIR_INDEX",
4652 key->objectid, key->offset, data_len);
4654 name_len = btrfs_dir_name_len(node, di);
4655 if (cur + sizeof(*di) + name_len > total ||
4656 name_len > BTRFS_NAME_LEN) {
4657 warning("root %llu %s[%llu %llu] name too long",
4659 key->type == BTRFS_DIR_ITEM_KEY ?
4660 "DIR_ITEM" : "DIR_INDEX",
4661 key->objectid, key->offset);
4663 if (cur + sizeof(*di) > total)
4665 len = min_t(u32, total - cur - sizeof(*di),
4670 (*size) += name_len;
4672 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4673 filetype = btrfs_dir_type(node, di);
4675 btrfs_init_path(&path);
4676 btrfs_dir_item_key_to_cpu(node, di, &location);
4678 /* Ignore related ROOT_ITEM check */
4679 if (location.type == BTRFS_ROOT_ITEM_KEY)
4682 /* Check relative INODE_ITEM(existence/filetype) */
4683 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4685 err |= INODE_ITEM_MISSING;
4686 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4687 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4688 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4689 key->offset, location.objectid, name_len,
4694 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4695 struct btrfs_inode_item);
4696 mode = btrfs_inode_mode(path.nodes[0], ii);
4698 if (imode_to_type(mode) != filetype) {
4699 err |= INODE_ITEM_MISMATCH;
4700 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4701 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4702 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4703 key->offset, name_len, namebuf, filetype);
4706 /* Check relative INODE_REF/INODE_EXTREF */
4707 location.type = BTRFS_INODE_REF_KEY;
4708 location.offset = key->objectid;
4709 ret = find_inode_ref(root, &location, namebuf, len,
4712 if (ret & INODE_REF_MISSING)
4713 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4714 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4715 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4716 key->offset, name_len, namebuf, filetype);
4719 btrfs_release_path(&path);
4720 len = sizeof(*di) + name_len + data_len;
4721 di = (struct btrfs_dir_item *)((char *)di + len);
4724 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4725 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4726 root->objectid, key->objectid, key->offset);
4735 * Check file extent datasum/hole, update the size of the file extents,
4736 * check and update the last offset of the file extent.
4738 * @root: the root of fs/file tree.
4739 * @fkey: the key of the file extent.
4740 * @nodatasum: INODE_NODATASUM feature.
4741 * @size: the sum of all EXTENT_DATA items size for this inode.
4742 * @end: the offset of the last extent.
4744 * Return 0 if no error occurred.
4746 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4747 struct extent_buffer *node, int slot,
4748 unsigned int nodatasum, u64 *size, u64 *end)
4750 struct btrfs_file_extent_item *fi;
4753 u64 extent_num_bytes;
4755 u64 csum_found; /* In byte size, sectorsize aligned */
4756 u64 search_start; /* Logical range start we search for csum */
4757 u64 search_len; /* Logical range len we search for csum */
4758 unsigned int extent_type;
4759 unsigned int is_hole;
4764 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4766 /* Check inline extent */
4767 extent_type = btrfs_file_extent_type(node, fi);
4768 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4769 struct btrfs_item *e = btrfs_item_nr(slot);
4770 u32 item_inline_len;
4772 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4773 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4774 compressed = btrfs_file_extent_compression(node, fi);
4775 if (extent_num_bytes == 0) {
4777 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4778 root->objectid, fkey->objectid, fkey->offset);
4779 err |= FILE_EXTENT_ERROR;
4781 if (!compressed && extent_num_bytes != item_inline_len) {
4783 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4784 root->objectid, fkey->objectid, fkey->offset,
4785 extent_num_bytes, item_inline_len);
4786 err |= FILE_EXTENT_ERROR;
4788 *end += extent_num_bytes;
4789 *size += extent_num_bytes;
4793 /* Check extent type */
4794 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4795 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4796 err |= FILE_EXTENT_ERROR;
4797 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4798 root->objectid, fkey->objectid, fkey->offset);
4802 /* Check REG_EXTENT/PREALLOC_EXTENT */
4803 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4804 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4805 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4806 extent_offset = btrfs_file_extent_offset(node, fi);
4807 compressed = btrfs_file_extent_compression(node, fi);
4808 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4811 * Check EXTENT_DATA csum
4813 * For plain (uncompressed) extent, we should only check the range
4814 * we're referring to, as it's possible that part of prealloc extent
4815 * has been written, and has csum:
4817 * |<--- Original large preallocated extent A ---->|
4818 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4821 * For compressed extent, we should check the whole range.
4824 search_start = disk_bytenr + extent_offset;
4825 search_len = extent_num_bytes;
4827 search_start = disk_bytenr;
4828 search_len = disk_num_bytes;
4830 ret = count_csum_range(root, search_start, search_len, &csum_found);
4831 if (csum_found > 0 && nodatasum) {
4832 err |= ODD_CSUM_ITEM;
4833 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4834 root->objectid, fkey->objectid, fkey->offset);
4835 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4836 !is_hole && (ret < 0 || csum_found < search_len)) {
4837 err |= CSUM_ITEM_MISSING;
4838 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4839 root->objectid, fkey->objectid, fkey->offset,
4840 csum_found, search_len);
4841 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4842 err |= ODD_CSUM_ITEM;
4843 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4844 root->objectid, fkey->objectid, fkey->offset, csum_found);
4847 /* Check EXTENT_DATA hole */
4848 if (!no_holes && *end != fkey->offset) {
4849 err |= FILE_EXTENT_ERROR;
4850 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4851 root->objectid, fkey->objectid, fkey->offset);
4854 *end += extent_num_bytes;
4856 *size += extent_num_bytes;
4862 * Check INODE_ITEM and related ITEMs (the same inode number)
4863 * 1. check link count
4864 * 2. check inode ref/extref
4865 * 3. check dir item/index
4867 * @ext_ref: the EXTENDED_IREF feature
4869 * Return 0 if no error occurred.
4870 * Return >0 for error or hit the traversal is done(by error bitmap)
4872 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4873 unsigned int ext_ref)
4875 struct extent_buffer *node;
4876 struct btrfs_inode_item *ii;
4877 struct btrfs_key key;
4886 u64 extent_size = 0;
4888 unsigned int nodatasum;
4893 node = path->nodes[0];
4894 slot = path->slots[0];
4896 btrfs_item_key_to_cpu(node, &key, slot);
4897 inode_id = key.objectid;
4899 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4900 ret = btrfs_next_item(root, path);
4906 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4907 isize = btrfs_inode_size(node, ii);
4908 nbytes = btrfs_inode_nbytes(node, ii);
4909 mode = btrfs_inode_mode(node, ii);
4910 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4911 nlink = btrfs_inode_nlink(node, ii);
4912 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4915 ret = btrfs_next_item(root, path);
4917 /* out will fill 'err' rusing current statistics */
4919 } else if (ret > 0) {
4924 node = path->nodes[0];
4925 slot = path->slots[0];
4926 btrfs_item_key_to_cpu(node, &key, slot);
4927 if (key.objectid != inode_id)
4931 case BTRFS_INODE_REF_KEY:
4932 ret = check_inode_ref(root, &key, node, slot, &refs,
4936 case BTRFS_INODE_EXTREF_KEY:
4937 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4938 warning("root %llu EXTREF[%llu %llu] isn't supported",
4939 root->objectid, key.objectid,
4941 ret = check_inode_extref(root, &key, node, slot, &refs,
4945 case BTRFS_DIR_ITEM_KEY:
4946 case BTRFS_DIR_INDEX_KEY:
4948 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4949 root->objectid, inode_id,
4950 imode_to_type(mode), key.objectid,
4953 ret = check_dir_item(root, &key, node, slot, &size,
4957 case BTRFS_EXTENT_DATA_KEY:
4959 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4960 root->objectid, inode_id, key.objectid,
4963 ret = check_file_extent(root, &key, node, slot,
4964 nodatasum, &extent_size,
4968 case BTRFS_XATTR_ITEM_KEY:
4971 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4972 key.objectid, key.type, key.offset);
4977 /* verify INODE_ITEM nlink/isize/nbytes */
4980 err |= LINK_COUNT_ERROR;
4981 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4982 root->objectid, inode_id, nlink);
4986 * Just a warning, as dir inode nbytes is just an
4987 * instructive value.
4989 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4990 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4991 root->objectid, inode_id,
4992 root->fs_info->nodesize);
4995 if (isize != size) {
4997 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4998 root->objectid, inode_id, isize, size);
5001 if (nlink != refs) {
5002 err |= LINK_COUNT_ERROR;
5003 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5004 root->objectid, inode_id, nlink, refs);
5005 } else if (!nlink) {
5009 if (!nbytes && !no_holes && extent_end < isize) {
5010 err |= NBYTES_ERROR;
5011 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5012 root->objectid, inode_id, isize);
5015 if (nbytes != extent_size) {
5016 err |= NBYTES_ERROR;
5017 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5018 root->objectid, inode_id, nbytes, extent_size);
5025 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5027 struct btrfs_path path;
5028 struct btrfs_key key;
5032 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5033 key.type = BTRFS_INODE_ITEM_KEY;
5036 /* For root being dropped, we don't need to check first inode */
5037 if (btrfs_root_refs(&root->root_item) == 0 &&
5038 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5042 btrfs_init_path(&path);
5044 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5049 err |= INODE_ITEM_MISSING;
5050 error("first inode item of root %llu is missing",
5054 err |= check_inode_item(root, &path, ext_ref);
5059 btrfs_release_path(&path);
5064 * Iterate all item on the tree and call check_inode_item() to check.
5066 * @root: the root of the tree to be checked.
5067 * @ext_ref: the EXTENDED_IREF feature
5069 * Return 0 if no error found.
5070 * Return <0 for error.
5072 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5074 struct btrfs_path path;
5075 struct node_refs nrefs;
5076 struct btrfs_root_item *root_item = &root->root_item;
5082 * We need to manually check the first inode item(256)
5083 * As the following traversal function will only start from
5084 * the first inode item in the leaf, if inode item(256) is missing
5085 * we will just skip it forever.
5087 ret = check_fs_first_inode(root, ext_ref);
5091 memset(&nrefs, 0, sizeof(nrefs));
5092 level = btrfs_header_level(root->node);
5093 btrfs_init_path(&path);
5095 if (btrfs_root_refs(root_item) > 0 ||
5096 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5097 path.nodes[level] = root->node;
5098 path.slots[level] = 0;
5099 extent_buffer_get(root->node);
5101 struct btrfs_key key;
5103 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5104 level = root_item->drop_level;
5105 path.lowest_level = level;
5106 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5113 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5116 /* if ret is negative, walk shall stop */
5122 ret = walk_up_tree_v2(root, &path, &level);
5124 /* Normal exit, reset ret to err */
5131 btrfs_release_path(&path);
5136 * Find the relative ref for root_ref and root_backref.
5138 * @root: the root of the root tree.
5139 * @ref_key: the key of the root ref.
5141 * Return 0 if no error occurred.
5143 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5144 struct extent_buffer *node, int slot)
5146 struct btrfs_path path;
5147 struct btrfs_key key;
5148 struct btrfs_root_ref *ref;
5149 struct btrfs_root_ref *backref;
5150 char ref_name[BTRFS_NAME_LEN] = {0};
5151 char backref_name[BTRFS_NAME_LEN] = {0};
5157 u32 backref_namelen;
5162 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5163 ref_dirid = btrfs_root_ref_dirid(node, ref);
5164 ref_seq = btrfs_root_ref_sequence(node, ref);
5165 ref_namelen = btrfs_root_ref_name_len(node, ref);
5167 if (ref_namelen <= BTRFS_NAME_LEN) {
5170 len = BTRFS_NAME_LEN;
5171 warning("%s[%llu %llu] ref_name too long",
5172 ref_key->type == BTRFS_ROOT_REF_KEY ?
5173 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5176 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5178 /* Find relative root_ref */
5179 key.objectid = ref_key->offset;
5180 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5181 key.offset = ref_key->objectid;
5183 btrfs_init_path(&path);
5184 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5186 err |= ROOT_REF_MISSING;
5187 error("%s[%llu %llu] couldn't find relative ref",
5188 ref_key->type == BTRFS_ROOT_REF_KEY ?
5189 "ROOT_REF" : "ROOT_BACKREF",
5190 ref_key->objectid, ref_key->offset);
5194 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5195 struct btrfs_root_ref);
5196 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5197 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5198 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5200 if (backref_namelen <= BTRFS_NAME_LEN) {
5201 len = backref_namelen;
5203 len = BTRFS_NAME_LEN;
5204 warning("%s[%llu %llu] ref_name too long",
5205 key.type == BTRFS_ROOT_REF_KEY ?
5206 "ROOT_REF" : "ROOT_BACKREF",
5207 key.objectid, key.offset);
5209 read_extent_buffer(path.nodes[0], backref_name,
5210 (unsigned long)(backref + 1), len);
5212 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5213 ref_namelen != backref_namelen ||
5214 strncmp(ref_name, backref_name, len)) {
5215 err |= ROOT_REF_MISMATCH;
5216 error("%s[%llu %llu] mismatch relative ref",
5217 ref_key->type == BTRFS_ROOT_REF_KEY ?
5218 "ROOT_REF" : "ROOT_BACKREF",
5219 ref_key->objectid, ref_key->offset);
5222 btrfs_release_path(&path);
5227 * Check all fs/file tree in low_memory mode.
5229 * 1. for fs tree root item, call check_fs_root_v2()
5230 * 2. for fs tree root ref/backref, call check_root_ref()
5232 * Return 0 if no error occurred.
5234 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5236 struct btrfs_root *tree_root = fs_info->tree_root;
5237 struct btrfs_root *cur_root = NULL;
5238 struct btrfs_path path;
5239 struct btrfs_key key;
5240 struct extent_buffer *node;
5241 unsigned int ext_ref;
5246 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5248 btrfs_init_path(&path);
5249 key.objectid = BTRFS_FS_TREE_OBJECTID;
5251 key.type = BTRFS_ROOT_ITEM_KEY;
5253 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5257 } else if (ret > 0) {
5263 node = path.nodes[0];
5264 slot = path.slots[0];
5265 btrfs_item_key_to_cpu(node, &key, slot);
5266 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5268 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5269 fs_root_objectid(key.objectid)) {
5270 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5271 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5274 key.offset = (u64)-1;
5275 cur_root = btrfs_read_fs_root(fs_info, &key);
5278 if (IS_ERR(cur_root)) {
5279 error("Fail to read fs/subvol tree: %lld",
5285 ret = check_fs_root_v2(cur_root, ext_ref);
5288 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5289 btrfs_free_fs_root(cur_root);
5290 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5291 key.type == BTRFS_ROOT_BACKREF_KEY) {
5292 ret = check_root_ref(tree_root, &key, node, slot);
5296 ret = btrfs_next_item(tree_root, &path);
5306 btrfs_release_path(&path);
5310 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5312 struct list_head *cur = rec->backrefs.next;
5313 struct extent_backref *back;
5314 struct tree_backref *tback;
5315 struct data_backref *dback;
5319 while(cur != &rec->backrefs) {
5320 back = to_extent_backref(cur);
5322 if (!back->found_extent_tree) {
5326 if (back->is_data) {
5327 dback = to_data_backref(back);
5328 fprintf(stderr, "Backref %llu %s %llu"
5329 " owner %llu offset %llu num_refs %lu"
5330 " not found in extent tree\n",
5331 (unsigned long long)rec->start,
5332 back->full_backref ?
5334 back->full_backref ?
5335 (unsigned long long)dback->parent:
5336 (unsigned long long)dback->root,
5337 (unsigned long long)dback->owner,
5338 (unsigned long long)dback->offset,
5339 (unsigned long)dback->num_refs);
5341 tback = to_tree_backref(back);
5342 fprintf(stderr, "Backref %llu parent %llu"
5343 " root %llu not found in extent tree\n",
5344 (unsigned long long)rec->start,
5345 (unsigned long long)tback->parent,
5346 (unsigned long long)tback->root);
5349 if (!back->is_data && !back->found_ref) {
5353 tback = to_tree_backref(back);
5354 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5355 (unsigned long long)rec->start,
5356 back->full_backref ? "parent" : "root",
5357 back->full_backref ?
5358 (unsigned long long)tback->parent :
5359 (unsigned long long)tback->root, back);
5361 if (back->is_data) {
5362 dback = to_data_backref(back);
5363 if (dback->found_ref != dback->num_refs) {
5367 fprintf(stderr, "Incorrect local backref count"
5368 " on %llu %s %llu owner %llu"
5369 " offset %llu found %u wanted %u back %p\n",
5370 (unsigned long long)rec->start,
5371 back->full_backref ?
5373 back->full_backref ?
5374 (unsigned long long)dback->parent:
5375 (unsigned long long)dback->root,
5376 (unsigned long long)dback->owner,
5377 (unsigned long long)dback->offset,
5378 dback->found_ref, dback->num_refs, back);
5380 if (dback->disk_bytenr != rec->start) {
5384 fprintf(stderr, "Backref disk bytenr does not"
5385 " match extent record, bytenr=%llu, "
5386 "ref bytenr=%llu\n",
5387 (unsigned long long)rec->start,
5388 (unsigned long long)dback->disk_bytenr);
5391 if (dback->bytes != rec->nr) {
5395 fprintf(stderr, "Backref bytes do not match "
5396 "extent backref, bytenr=%llu, ref "
5397 "bytes=%llu, backref bytes=%llu\n",
5398 (unsigned long long)rec->start,
5399 (unsigned long long)rec->nr,
5400 (unsigned long long)dback->bytes);
5403 if (!back->is_data) {
5406 dback = to_data_backref(back);
5407 found += dback->found_ref;
5410 if (found != rec->refs) {
5414 fprintf(stderr, "Incorrect global backref count "
5415 "on %llu found %llu wanted %llu\n",
5416 (unsigned long long)rec->start,
5417 (unsigned long long)found,
5418 (unsigned long long)rec->refs);
5424 static int free_all_extent_backrefs(struct extent_record *rec)
5426 struct extent_backref *back;
5427 struct list_head *cur;
5428 while (!list_empty(&rec->backrefs)) {
5429 cur = rec->backrefs.next;
5430 back = to_extent_backref(cur);
5437 static void free_extent_record_cache(struct cache_tree *extent_cache)
5439 struct cache_extent *cache;
5440 struct extent_record *rec;
5443 cache = first_cache_extent(extent_cache);
5446 rec = container_of(cache, struct extent_record, cache);
5447 remove_cache_extent(extent_cache, cache);
5448 free_all_extent_backrefs(rec);
5453 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5454 struct extent_record *rec)
5456 if (rec->content_checked && rec->owner_ref_checked &&
5457 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5458 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5459 !rec->bad_full_backref && !rec->crossing_stripes &&
5460 !rec->wrong_chunk_type) {
5461 remove_cache_extent(extent_cache, &rec->cache);
5462 free_all_extent_backrefs(rec);
5463 list_del_init(&rec->list);
5469 static int check_owner_ref(struct btrfs_root *root,
5470 struct extent_record *rec,
5471 struct extent_buffer *buf)
5473 struct extent_backref *node;
5474 struct tree_backref *back;
5475 struct btrfs_root *ref_root;
5476 struct btrfs_key key;
5477 struct btrfs_path path;
5478 struct extent_buffer *parent;
5483 list_for_each_entry(node, &rec->backrefs, list) {
5486 if (!node->found_ref)
5488 if (node->full_backref)
5490 back = to_tree_backref(node);
5491 if (btrfs_header_owner(buf) == back->root)
5494 BUG_ON(rec->is_root);
5496 /* try to find the block by search corresponding fs tree */
5497 key.objectid = btrfs_header_owner(buf);
5498 key.type = BTRFS_ROOT_ITEM_KEY;
5499 key.offset = (u64)-1;
5501 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5502 if (IS_ERR(ref_root))
5505 level = btrfs_header_level(buf);
5507 btrfs_item_key_to_cpu(buf, &key, 0);
5509 btrfs_node_key_to_cpu(buf, &key, 0);
5511 btrfs_init_path(&path);
5512 path.lowest_level = level + 1;
5513 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5517 parent = path.nodes[level + 1];
5518 if (parent && buf->start == btrfs_node_blockptr(parent,
5519 path.slots[level + 1]))
5522 btrfs_release_path(&path);
5523 return found ? 0 : 1;
5526 static int is_extent_tree_record(struct extent_record *rec)
5528 struct list_head *cur = rec->backrefs.next;
5529 struct extent_backref *node;
5530 struct tree_backref *back;
5533 while(cur != &rec->backrefs) {
5534 node = to_extent_backref(cur);
5538 back = to_tree_backref(node);
5539 if (node->full_backref)
5541 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5548 static int record_bad_block_io(struct btrfs_fs_info *info,
5549 struct cache_tree *extent_cache,
5552 struct extent_record *rec;
5553 struct cache_extent *cache;
5554 struct btrfs_key key;
5556 cache = lookup_cache_extent(extent_cache, start, len);
5560 rec = container_of(cache, struct extent_record, cache);
5561 if (!is_extent_tree_record(rec))
5564 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5565 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5568 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5569 struct extent_buffer *buf, int slot)
5571 if (btrfs_header_level(buf)) {
5572 struct btrfs_key_ptr ptr1, ptr2;
5574 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5575 sizeof(struct btrfs_key_ptr));
5576 read_extent_buffer(buf, &ptr2,
5577 btrfs_node_key_ptr_offset(slot + 1),
5578 sizeof(struct btrfs_key_ptr));
5579 write_extent_buffer(buf, &ptr1,
5580 btrfs_node_key_ptr_offset(slot + 1),
5581 sizeof(struct btrfs_key_ptr));
5582 write_extent_buffer(buf, &ptr2,
5583 btrfs_node_key_ptr_offset(slot),
5584 sizeof(struct btrfs_key_ptr));
5586 struct btrfs_disk_key key;
5587 btrfs_node_key(buf, &key, 0);
5588 btrfs_fixup_low_keys(root, path, &key,
5589 btrfs_header_level(buf) + 1);
5592 struct btrfs_item *item1, *item2;
5593 struct btrfs_key k1, k2;
5594 char *item1_data, *item2_data;
5595 u32 item1_offset, item2_offset, item1_size, item2_size;
5597 item1 = btrfs_item_nr(slot);
5598 item2 = btrfs_item_nr(slot + 1);
5599 btrfs_item_key_to_cpu(buf, &k1, slot);
5600 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5601 item1_offset = btrfs_item_offset(buf, item1);
5602 item2_offset = btrfs_item_offset(buf, item2);
5603 item1_size = btrfs_item_size(buf, item1);
5604 item2_size = btrfs_item_size(buf, item2);
5606 item1_data = malloc(item1_size);
5609 item2_data = malloc(item2_size);
5615 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5616 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5618 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5619 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5623 btrfs_set_item_offset(buf, item1, item2_offset);
5624 btrfs_set_item_offset(buf, item2, item1_offset);
5625 btrfs_set_item_size(buf, item1, item2_size);
5626 btrfs_set_item_size(buf, item2, item1_size);
5628 path->slots[0] = slot;
5629 btrfs_set_item_key_unsafe(root, path, &k2);
5630 path->slots[0] = slot + 1;
5631 btrfs_set_item_key_unsafe(root, path, &k1);
5636 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5638 struct extent_buffer *buf;
5639 struct btrfs_key k1, k2;
5641 int level = path->lowest_level;
5644 buf = path->nodes[level];
5645 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5647 btrfs_node_key_to_cpu(buf, &k1, i);
5648 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5650 btrfs_item_key_to_cpu(buf, &k1, i);
5651 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5653 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5655 ret = swap_values(root, path, buf, i);
5658 btrfs_mark_buffer_dirty(buf);
5664 static int delete_bogus_item(struct btrfs_root *root,
5665 struct btrfs_path *path,
5666 struct extent_buffer *buf, int slot)
5668 struct btrfs_key key;
5669 int nritems = btrfs_header_nritems(buf);
5671 btrfs_item_key_to_cpu(buf, &key, slot);
5673 /* These are all the keys we can deal with missing. */
5674 if (key.type != BTRFS_DIR_INDEX_KEY &&
5675 key.type != BTRFS_EXTENT_ITEM_KEY &&
5676 key.type != BTRFS_METADATA_ITEM_KEY &&
5677 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5678 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5681 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5682 (unsigned long long)key.objectid, key.type,
5683 (unsigned long long)key.offset, slot, buf->start);
5684 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5685 btrfs_item_nr_offset(slot + 1),
5686 sizeof(struct btrfs_item) *
5687 (nritems - slot - 1));
5688 btrfs_set_header_nritems(buf, nritems - 1);
5690 struct btrfs_disk_key disk_key;
5692 btrfs_item_key(buf, &disk_key, 0);
5693 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5695 btrfs_mark_buffer_dirty(buf);
5699 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5701 struct extent_buffer *buf;
5705 /* We should only get this for leaves */
5706 BUG_ON(path->lowest_level);
5707 buf = path->nodes[0];
5709 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5710 unsigned int shift = 0, offset;
5712 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5713 BTRFS_LEAF_DATA_SIZE(root)) {
5714 if (btrfs_item_end_nr(buf, i) >
5715 BTRFS_LEAF_DATA_SIZE(root)) {
5716 ret = delete_bogus_item(root, path, buf, i);
5719 fprintf(stderr, "item is off the end of the "
5720 "leaf, can't fix\n");
5724 shift = BTRFS_LEAF_DATA_SIZE(root) -
5725 btrfs_item_end_nr(buf, i);
5726 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5727 btrfs_item_offset_nr(buf, i - 1)) {
5728 if (btrfs_item_end_nr(buf, i) >
5729 btrfs_item_offset_nr(buf, i - 1)) {
5730 ret = delete_bogus_item(root, path, buf, i);
5733 fprintf(stderr, "items overlap, can't fix\n");
5737 shift = btrfs_item_offset_nr(buf, i - 1) -
5738 btrfs_item_end_nr(buf, i);
5743 printf("Shifting item nr %d by %u bytes in block %llu\n",
5744 i, shift, (unsigned long long)buf->start);
5745 offset = btrfs_item_offset_nr(buf, i);
5746 memmove_extent_buffer(buf,
5747 btrfs_leaf_data(buf) + offset + shift,
5748 btrfs_leaf_data(buf) + offset,
5749 btrfs_item_size_nr(buf, i));
5750 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5752 btrfs_mark_buffer_dirty(buf);
5756 * We may have moved things, in which case we want to exit so we don't
5757 * write those changes out. Once we have proper abort functionality in
5758 * progs this can be changed to something nicer.
5765 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5766 * then just return -EIO.
5768 static int try_to_fix_bad_block(struct btrfs_root *root,
5769 struct extent_buffer *buf,
5770 enum btrfs_tree_block_status status)
5772 struct btrfs_trans_handle *trans;
5773 struct ulist *roots;
5774 struct ulist_node *node;
5775 struct btrfs_root *search_root;
5776 struct btrfs_path path;
5777 struct ulist_iterator iter;
5778 struct btrfs_key root_key, key;
5781 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5782 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5785 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5789 btrfs_init_path(&path);
5790 ULIST_ITER_INIT(&iter);
5791 while ((node = ulist_next(roots, &iter))) {
5792 root_key.objectid = node->val;
5793 root_key.type = BTRFS_ROOT_ITEM_KEY;
5794 root_key.offset = (u64)-1;
5796 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5803 trans = btrfs_start_transaction(search_root, 0);
5804 if (IS_ERR(trans)) {
5805 ret = PTR_ERR(trans);
5809 path.lowest_level = btrfs_header_level(buf);
5810 path.skip_check_block = 1;
5811 if (path.lowest_level)
5812 btrfs_node_key_to_cpu(buf, &key, 0);
5814 btrfs_item_key_to_cpu(buf, &key, 0);
5815 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5818 btrfs_commit_transaction(trans, search_root);
5821 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5822 ret = fix_key_order(search_root, &path);
5823 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5824 ret = fix_item_offset(search_root, &path);
5826 btrfs_commit_transaction(trans, search_root);
5829 btrfs_release_path(&path);
5830 btrfs_commit_transaction(trans, search_root);
5833 btrfs_release_path(&path);
5837 static int check_block(struct btrfs_root *root,
5838 struct cache_tree *extent_cache,
5839 struct extent_buffer *buf, u64 flags)
5841 struct extent_record *rec;
5842 struct cache_extent *cache;
5843 struct btrfs_key key;
5844 enum btrfs_tree_block_status status;
5848 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5851 rec = container_of(cache, struct extent_record, cache);
5852 rec->generation = btrfs_header_generation(buf);
5854 level = btrfs_header_level(buf);
5855 if (btrfs_header_nritems(buf) > 0) {
5858 btrfs_item_key_to_cpu(buf, &key, 0);
5860 btrfs_node_key_to_cpu(buf, &key, 0);
5862 rec->info_objectid = key.objectid;
5864 rec->info_level = level;
5866 if (btrfs_is_leaf(buf))
5867 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5869 status = btrfs_check_node(root, &rec->parent_key, buf);
5871 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5873 status = try_to_fix_bad_block(root, buf, status);
5874 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5876 fprintf(stderr, "bad block %llu\n",
5877 (unsigned long long)buf->start);
5880 * Signal to callers we need to start the scan over
5881 * again since we'll have cowed blocks.
5886 rec->content_checked = 1;
5887 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5888 rec->owner_ref_checked = 1;
5890 ret = check_owner_ref(root, rec, buf);
5892 rec->owner_ref_checked = 1;
5896 maybe_free_extent_rec(extent_cache, rec);
5900 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5901 u64 parent, u64 root)
5903 struct list_head *cur = rec->backrefs.next;
5904 struct extent_backref *node;
5905 struct tree_backref *back;
5907 while(cur != &rec->backrefs) {
5908 node = to_extent_backref(cur);
5912 back = to_tree_backref(node);
5914 if (!node->full_backref)
5916 if (parent == back->parent)
5919 if (node->full_backref)
5921 if (back->root == root)
5928 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5929 u64 parent, u64 root)
5931 struct tree_backref *ref = malloc(sizeof(*ref));
5935 memset(&ref->node, 0, sizeof(ref->node));
5937 ref->parent = parent;
5938 ref->node.full_backref = 1;
5941 ref->node.full_backref = 0;
5943 list_add_tail(&ref->node.list, &rec->backrefs);
5948 static struct data_backref *find_data_backref(struct extent_record *rec,
5949 u64 parent, u64 root,
5950 u64 owner, u64 offset,
5952 u64 disk_bytenr, u64 bytes)
5954 struct list_head *cur = rec->backrefs.next;
5955 struct extent_backref *node;
5956 struct data_backref *back;
5958 while(cur != &rec->backrefs) {
5959 node = to_extent_backref(cur);
5963 back = to_data_backref(node);
5965 if (!node->full_backref)
5967 if (parent == back->parent)
5970 if (node->full_backref)
5972 if (back->root == root && back->owner == owner &&
5973 back->offset == offset) {
5974 if (found_ref && node->found_ref &&
5975 (back->bytes != bytes ||
5976 back->disk_bytenr != disk_bytenr))
5985 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5986 u64 parent, u64 root,
5987 u64 owner, u64 offset,
5990 struct data_backref *ref = malloc(sizeof(*ref));
5994 memset(&ref->node, 0, sizeof(ref->node));
5995 ref->node.is_data = 1;
5998 ref->parent = parent;
6001 ref->node.full_backref = 1;
6005 ref->offset = offset;
6006 ref->node.full_backref = 0;
6008 ref->bytes = max_size;
6011 list_add_tail(&ref->node.list, &rec->backrefs);
6012 if (max_size > rec->max_size)
6013 rec->max_size = max_size;
6017 /* Check if the type of extent matches with its chunk */
6018 static void check_extent_type(struct extent_record *rec)
6020 struct btrfs_block_group_cache *bg_cache;
6022 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6026 /* data extent, check chunk directly*/
6027 if (!rec->metadata) {
6028 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6029 rec->wrong_chunk_type = 1;
6033 /* metadata extent, check the obvious case first */
6034 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6035 BTRFS_BLOCK_GROUP_METADATA))) {
6036 rec->wrong_chunk_type = 1;
6041 * Check SYSTEM extent, as it's also marked as metadata, we can only
6042 * make sure it's a SYSTEM extent by its backref
6044 if (!list_empty(&rec->backrefs)) {
6045 struct extent_backref *node;
6046 struct tree_backref *tback;
6049 node = to_extent_backref(rec->backrefs.next);
6050 if (node->is_data) {
6051 /* tree block shouldn't have data backref */
6052 rec->wrong_chunk_type = 1;
6055 tback = container_of(node, struct tree_backref, node);
6057 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6058 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6060 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6061 if (!(bg_cache->flags & bg_type))
6062 rec->wrong_chunk_type = 1;
6067 * Allocate a new extent record, fill default values from @tmpl and insert int
6068 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6069 * the cache, otherwise it fails.
6071 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6072 struct extent_record *tmpl)
6074 struct extent_record *rec;
6077 BUG_ON(tmpl->max_size == 0);
6078 rec = malloc(sizeof(*rec));
6081 rec->start = tmpl->start;
6082 rec->max_size = tmpl->max_size;
6083 rec->nr = max(tmpl->nr, tmpl->max_size);
6084 rec->found_rec = tmpl->found_rec;
6085 rec->content_checked = tmpl->content_checked;
6086 rec->owner_ref_checked = tmpl->owner_ref_checked;
6087 rec->num_duplicates = 0;
6088 rec->metadata = tmpl->metadata;
6089 rec->flag_block_full_backref = FLAG_UNSET;
6090 rec->bad_full_backref = 0;
6091 rec->crossing_stripes = 0;
6092 rec->wrong_chunk_type = 0;
6093 rec->is_root = tmpl->is_root;
6094 rec->refs = tmpl->refs;
6095 rec->extent_item_refs = tmpl->extent_item_refs;
6096 rec->parent_generation = tmpl->parent_generation;
6097 INIT_LIST_HEAD(&rec->backrefs);
6098 INIT_LIST_HEAD(&rec->dups);
6099 INIT_LIST_HEAD(&rec->list);
6100 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6101 rec->cache.start = tmpl->start;
6102 rec->cache.size = tmpl->nr;
6103 ret = insert_cache_extent(extent_cache, &rec->cache);
6108 bytes_used += rec->nr;
6111 rec->crossing_stripes = check_crossing_stripes(global_info,
6112 rec->start, global_info->nodesize);
6113 check_extent_type(rec);
6118 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6120 * - refs - if found, increase refs
6121 * - is_root - if found, set
6122 * - content_checked - if found, set
6123 * - owner_ref_checked - if found, set
6125 * If not found, create a new one, initialize and insert.
6127 static int add_extent_rec(struct cache_tree *extent_cache,
6128 struct extent_record *tmpl)
6130 struct extent_record *rec;
6131 struct cache_extent *cache;
6135 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6137 rec = container_of(cache, struct extent_record, cache);
6141 rec->nr = max(tmpl->nr, tmpl->max_size);
6144 * We need to make sure to reset nr to whatever the extent
6145 * record says was the real size, this way we can compare it to
6148 if (tmpl->found_rec) {
6149 if (tmpl->start != rec->start || rec->found_rec) {
6150 struct extent_record *tmp;
6153 if (list_empty(&rec->list))
6154 list_add_tail(&rec->list,
6155 &duplicate_extents);
6158 * We have to do this song and dance in case we
6159 * find an extent record that falls inside of
6160 * our current extent record but does not have
6161 * the same objectid.
6163 tmp = malloc(sizeof(*tmp));
6166 tmp->start = tmpl->start;
6167 tmp->max_size = tmpl->max_size;
6170 tmp->metadata = tmpl->metadata;
6171 tmp->extent_item_refs = tmpl->extent_item_refs;
6172 INIT_LIST_HEAD(&tmp->list);
6173 list_add_tail(&tmp->list, &rec->dups);
6174 rec->num_duplicates++;
6181 if (tmpl->extent_item_refs && !dup) {
6182 if (rec->extent_item_refs) {
6183 fprintf(stderr, "block %llu rec "
6184 "extent_item_refs %llu, passed %llu\n",
6185 (unsigned long long)tmpl->start,
6186 (unsigned long long)
6187 rec->extent_item_refs,
6188 (unsigned long long)tmpl->extent_item_refs);
6190 rec->extent_item_refs = tmpl->extent_item_refs;
6194 if (tmpl->content_checked)
6195 rec->content_checked = 1;
6196 if (tmpl->owner_ref_checked)
6197 rec->owner_ref_checked = 1;
6198 memcpy(&rec->parent_key, &tmpl->parent_key,
6199 sizeof(tmpl->parent_key));
6200 if (tmpl->parent_generation)
6201 rec->parent_generation = tmpl->parent_generation;
6202 if (rec->max_size < tmpl->max_size)
6203 rec->max_size = tmpl->max_size;
6206 * A metadata extent can't cross stripe_len boundary, otherwise
6207 * kernel scrub won't be able to handle it.
6208 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6212 rec->crossing_stripes = check_crossing_stripes(
6213 global_info, rec->start,
6214 global_info->nodesize);
6215 check_extent_type(rec);
6216 maybe_free_extent_rec(extent_cache, rec);
6220 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6225 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6226 u64 parent, u64 root, int found_ref)
6228 struct extent_record *rec;
6229 struct tree_backref *back;
6230 struct cache_extent *cache;
6233 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6235 struct extent_record tmpl;
6237 memset(&tmpl, 0, sizeof(tmpl));
6238 tmpl.start = bytenr;
6243 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6247 /* really a bug in cache_extent implement now */
6248 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6253 rec = container_of(cache, struct extent_record, cache);
6254 if (rec->start != bytenr) {
6256 * Several cause, from unaligned bytenr to over lapping extents
6261 back = find_tree_backref(rec, parent, root);
6263 back = alloc_tree_backref(rec, parent, root);
6269 if (back->node.found_ref) {
6270 fprintf(stderr, "Extent back ref already exists "
6271 "for %llu parent %llu root %llu \n",
6272 (unsigned long long)bytenr,
6273 (unsigned long long)parent,
6274 (unsigned long long)root);
6276 back->node.found_ref = 1;
6278 if (back->node.found_extent_tree) {
6279 fprintf(stderr, "Extent back ref already exists "
6280 "for %llu parent %llu root %llu \n",
6281 (unsigned long long)bytenr,
6282 (unsigned long long)parent,
6283 (unsigned long long)root);
6285 back->node.found_extent_tree = 1;
6287 check_extent_type(rec);
6288 maybe_free_extent_rec(extent_cache, rec);
6292 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6293 u64 parent, u64 root, u64 owner, u64 offset,
6294 u32 num_refs, int found_ref, u64 max_size)
6296 struct extent_record *rec;
6297 struct data_backref *back;
6298 struct cache_extent *cache;
6301 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6303 struct extent_record tmpl;
6305 memset(&tmpl, 0, sizeof(tmpl));
6306 tmpl.start = bytenr;
6308 tmpl.max_size = max_size;
6310 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6314 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6319 rec = container_of(cache, struct extent_record, cache);
6320 if (rec->max_size < max_size)
6321 rec->max_size = max_size;
6324 * If found_ref is set then max_size is the real size and must match the
6325 * existing refs. So if we have already found a ref then we need to
6326 * make sure that this ref matches the existing one, otherwise we need
6327 * to add a new backref so we can notice that the backrefs don't match
6328 * and we need to figure out who is telling the truth. This is to
6329 * account for that awful fsync bug I introduced where we'd end up with
6330 * a btrfs_file_extent_item that would have its length include multiple
6331 * prealloc extents or point inside of a prealloc extent.
6333 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6336 back = alloc_data_backref(rec, parent, root, owner, offset,
6342 BUG_ON(num_refs != 1);
6343 if (back->node.found_ref)
6344 BUG_ON(back->bytes != max_size);
6345 back->node.found_ref = 1;
6346 back->found_ref += 1;
6347 back->bytes = max_size;
6348 back->disk_bytenr = bytenr;
6350 rec->content_checked = 1;
6351 rec->owner_ref_checked = 1;
6353 if (back->node.found_extent_tree) {
6354 fprintf(stderr, "Extent back ref already exists "
6355 "for %llu parent %llu root %llu "
6356 "owner %llu offset %llu num_refs %lu\n",
6357 (unsigned long long)bytenr,
6358 (unsigned long long)parent,
6359 (unsigned long long)root,
6360 (unsigned long long)owner,
6361 (unsigned long long)offset,
6362 (unsigned long)num_refs);
6364 back->num_refs = num_refs;
6365 back->node.found_extent_tree = 1;
6367 maybe_free_extent_rec(extent_cache, rec);
6371 static int add_pending(struct cache_tree *pending,
6372 struct cache_tree *seen, u64 bytenr, u32 size)
6375 ret = add_cache_extent(seen, bytenr, size);
6378 add_cache_extent(pending, bytenr, size);
6382 static int pick_next_pending(struct cache_tree *pending,
6383 struct cache_tree *reada,
6384 struct cache_tree *nodes,
6385 u64 last, struct block_info *bits, int bits_nr,
6388 unsigned long node_start = last;
6389 struct cache_extent *cache;
6392 cache = search_cache_extent(reada, 0);
6394 bits[0].start = cache->start;
6395 bits[0].size = cache->size;
6400 if (node_start > 32768)
6401 node_start -= 32768;
6403 cache = search_cache_extent(nodes, node_start);
6405 cache = search_cache_extent(nodes, 0);
6408 cache = search_cache_extent(pending, 0);
6413 bits[ret].start = cache->start;
6414 bits[ret].size = cache->size;
6415 cache = next_cache_extent(cache);
6417 } while (cache && ret < bits_nr);
6423 bits[ret].start = cache->start;
6424 bits[ret].size = cache->size;
6425 cache = next_cache_extent(cache);
6427 } while (cache && ret < bits_nr);
6429 if (bits_nr - ret > 8) {
6430 u64 lookup = bits[0].start + bits[0].size;
6431 struct cache_extent *next;
6432 next = search_cache_extent(pending, lookup);
6434 if (next->start - lookup > 32768)
6436 bits[ret].start = next->start;
6437 bits[ret].size = next->size;
6438 lookup = next->start + next->size;
6442 next = next_cache_extent(next);
6450 static void free_chunk_record(struct cache_extent *cache)
6452 struct chunk_record *rec;
6454 rec = container_of(cache, struct chunk_record, cache);
6455 list_del_init(&rec->list);
6456 list_del_init(&rec->dextents);
6460 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6462 cache_tree_free_extents(chunk_cache, free_chunk_record);
6465 static void free_device_record(struct rb_node *node)
6467 struct device_record *rec;
6469 rec = container_of(node, struct device_record, node);
6473 FREE_RB_BASED_TREE(device_cache, free_device_record);
6475 int insert_block_group_record(struct block_group_tree *tree,
6476 struct block_group_record *bg_rec)
6480 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6484 list_add_tail(&bg_rec->list, &tree->block_groups);
6488 static void free_block_group_record(struct cache_extent *cache)
6490 struct block_group_record *rec;
6492 rec = container_of(cache, struct block_group_record, cache);
6493 list_del_init(&rec->list);
6497 void free_block_group_tree(struct block_group_tree *tree)
6499 cache_tree_free_extents(&tree->tree, free_block_group_record);
6502 int insert_device_extent_record(struct device_extent_tree *tree,
6503 struct device_extent_record *de_rec)
6508 * Device extent is a bit different from the other extents, because
6509 * the extents which belong to the different devices may have the
6510 * same start and size, so we need use the special extent cache
6511 * search/insert functions.
6513 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6517 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6518 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6522 static void free_device_extent_record(struct cache_extent *cache)
6524 struct device_extent_record *rec;
6526 rec = container_of(cache, struct device_extent_record, cache);
6527 if (!list_empty(&rec->chunk_list))
6528 list_del_init(&rec->chunk_list);
6529 if (!list_empty(&rec->device_list))
6530 list_del_init(&rec->device_list);
6534 void free_device_extent_tree(struct device_extent_tree *tree)
6536 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6539 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6540 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6541 struct extent_buffer *leaf, int slot)
6543 struct btrfs_extent_ref_v0 *ref0;
6544 struct btrfs_key key;
6547 btrfs_item_key_to_cpu(leaf, &key, slot);
6548 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6549 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6550 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6553 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6554 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6560 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6561 struct btrfs_key *key,
6564 struct btrfs_chunk *ptr;
6565 struct chunk_record *rec;
6568 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6569 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6571 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6573 fprintf(stderr, "memory allocation failed\n");
6577 INIT_LIST_HEAD(&rec->list);
6578 INIT_LIST_HEAD(&rec->dextents);
6581 rec->cache.start = key->offset;
6582 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6584 rec->generation = btrfs_header_generation(leaf);
6586 rec->objectid = key->objectid;
6587 rec->type = key->type;
6588 rec->offset = key->offset;
6590 rec->length = rec->cache.size;
6591 rec->owner = btrfs_chunk_owner(leaf, ptr);
6592 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6593 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6594 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6595 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6596 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6597 rec->num_stripes = num_stripes;
6598 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6600 for (i = 0; i < rec->num_stripes; ++i) {
6601 rec->stripes[i].devid =
6602 btrfs_stripe_devid_nr(leaf, ptr, i);
6603 rec->stripes[i].offset =
6604 btrfs_stripe_offset_nr(leaf, ptr, i);
6605 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6606 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6613 static int process_chunk_item(struct cache_tree *chunk_cache,
6614 struct btrfs_key *key, struct extent_buffer *eb,
6617 struct chunk_record *rec;
6618 struct btrfs_chunk *chunk;
6621 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6623 * Do extra check for this chunk item,
6625 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6626 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6627 * and owner<->key_type check.
6629 ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
6632 error("chunk(%llu, %llu) is not valid, ignore it",
6633 key->offset, btrfs_chunk_length(eb, chunk));
6636 rec = btrfs_new_chunk_record(eb, key, slot);
6637 ret = insert_cache_extent(chunk_cache, &rec->cache);
6639 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6640 rec->offset, rec->length);
6647 static int process_device_item(struct rb_root *dev_cache,
6648 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6650 struct btrfs_dev_item *ptr;
6651 struct device_record *rec;
6654 ptr = btrfs_item_ptr(eb,
6655 slot, struct btrfs_dev_item);
6657 rec = malloc(sizeof(*rec));
6659 fprintf(stderr, "memory allocation failed\n");
6663 rec->devid = key->offset;
6664 rec->generation = btrfs_header_generation(eb);
6666 rec->objectid = key->objectid;
6667 rec->type = key->type;
6668 rec->offset = key->offset;
6670 rec->devid = btrfs_device_id(eb, ptr);
6671 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6672 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6674 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6676 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6683 struct block_group_record *
6684 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6687 struct btrfs_block_group_item *ptr;
6688 struct block_group_record *rec;
6690 rec = calloc(1, sizeof(*rec));
6692 fprintf(stderr, "memory allocation failed\n");
6696 rec->cache.start = key->objectid;
6697 rec->cache.size = key->offset;
6699 rec->generation = btrfs_header_generation(leaf);
6701 rec->objectid = key->objectid;
6702 rec->type = key->type;
6703 rec->offset = key->offset;
6705 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6706 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6708 INIT_LIST_HEAD(&rec->list);
6713 static int process_block_group_item(struct block_group_tree *block_group_cache,
6714 struct btrfs_key *key,
6715 struct extent_buffer *eb, int slot)
6717 struct block_group_record *rec;
6720 rec = btrfs_new_block_group_record(eb, key, slot);
6721 ret = insert_block_group_record(block_group_cache, rec);
6723 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6724 rec->objectid, rec->offset);
6731 struct device_extent_record *
6732 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6733 struct btrfs_key *key, int slot)
6735 struct device_extent_record *rec;
6736 struct btrfs_dev_extent *ptr;
6738 rec = calloc(1, sizeof(*rec));
6740 fprintf(stderr, "memory allocation failed\n");
6744 rec->cache.objectid = key->objectid;
6745 rec->cache.start = key->offset;
6747 rec->generation = btrfs_header_generation(leaf);
6749 rec->objectid = key->objectid;
6750 rec->type = key->type;
6751 rec->offset = key->offset;
6753 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6754 rec->chunk_objecteid =
6755 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6757 btrfs_dev_extent_chunk_offset(leaf, ptr);
6758 rec->length = btrfs_dev_extent_length(leaf, ptr);
6759 rec->cache.size = rec->length;
6761 INIT_LIST_HEAD(&rec->chunk_list);
6762 INIT_LIST_HEAD(&rec->device_list);
6768 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6769 struct btrfs_key *key, struct extent_buffer *eb,
6772 struct device_extent_record *rec;
6775 rec = btrfs_new_device_extent_record(eb, key, slot);
6776 ret = insert_device_extent_record(dev_extent_cache, rec);
6779 "Device extent[%llu, %llu, %llu] existed.\n",
6780 rec->objectid, rec->offset, rec->length);
6787 static int process_extent_item(struct btrfs_root *root,
6788 struct cache_tree *extent_cache,
6789 struct extent_buffer *eb, int slot)
6791 struct btrfs_extent_item *ei;
6792 struct btrfs_extent_inline_ref *iref;
6793 struct btrfs_extent_data_ref *dref;
6794 struct btrfs_shared_data_ref *sref;
6795 struct btrfs_key key;
6796 struct extent_record tmpl;
6801 u32 item_size = btrfs_item_size_nr(eb, slot);
6807 btrfs_item_key_to_cpu(eb, &key, slot);
6809 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6811 num_bytes = root->fs_info->nodesize;
6813 num_bytes = key.offset;
6816 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6817 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6818 key.objectid, root->fs_info->sectorsize);
6821 if (item_size < sizeof(*ei)) {
6822 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6823 struct btrfs_extent_item_v0 *ei0;
6824 BUG_ON(item_size != sizeof(*ei0));
6825 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6826 refs = btrfs_extent_refs_v0(eb, ei0);
6830 memset(&tmpl, 0, sizeof(tmpl));
6831 tmpl.start = key.objectid;
6832 tmpl.nr = num_bytes;
6833 tmpl.extent_item_refs = refs;
6834 tmpl.metadata = metadata;
6836 tmpl.max_size = num_bytes;
6838 return add_extent_rec(extent_cache, &tmpl);
6841 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6842 refs = btrfs_extent_refs(eb, ei);
6843 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6847 if (metadata && num_bytes != root->fs_info->nodesize) {
6848 error("ignore invalid metadata extent, length %llu does not equal to %u",
6849 num_bytes, root->fs_info->nodesize);
6852 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6853 error("ignore invalid data extent, length %llu is not aligned to %u",
6854 num_bytes, root->fs_info->sectorsize);
6858 memset(&tmpl, 0, sizeof(tmpl));
6859 tmpl.start = key.objectid;
6860 tmpl.nr = num_bytes;
6861 tmpl.extent_item_refs = refs;
6862 tmpl.metadata = metadata;
6864 tmpl.max_size = num_bytes;
6865 add_extent_rec(extent_cache, &tmpl);
6867 ptr = (unsigned long)(ei + 1);
6868 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6869 key.type == BTRFS_EXTENT_ITEM_KEY)
6870 ptr += sizeof(struct btrfs_tree_block_info);
6872 end = (unsigned long)ei + item_size;
6874 iref = (struct btrfs_extent_inline_ref *)ptr;
6875 type = btrfs_extent_inline_ref_type(eb, iref);
6876 offset = btrfs_extent_inline_ref_offset(eb, iref);
6878 case BTRFS_TREE_BLOCK_REF_KEY:
6879 ret = add_tree_backref(extent_cache, key.objectid,
6883 "add_tree_backref failed (extent items tree block): %s",
6886 case BTRFS_SHARED_BLOCK_REF_KEY:
6887 ret = add_tree_backref(extent_cache, key.objectid,
6891 "add_tree_backref failed (extent items shared block): %s",
6894 case BTRFS_EXTENT_DATA_REF_KEY:
6895 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6896 add_data_backref(extent_cache, key.objectid, 0,
6897 btrfs_extent_data_ref_root(eb, dref),
6898 btrfs_extent_data_ref_objectid(eb,
6900 btrfs_extent_data_ref_offset(eb, dref),
6901 btrfs_extent_data_ref_count(eb, dref),
6904 case BTRFS_SHARED_DATA_REF_KEY:
6905 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6906 add_data_backref(extent_cache, key.objectid, offset,
6908 btrfs_shared_data_ref_count(eb, sref),
6912 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6913 key.objectid, key.type, num_bytes);
6916 ptr += btrfs_extent_inline_ref_size(type);
6923 static int check_cache_range(struct btrfs_root *root,
6924 struct btrfs_block_group_cache *cache,
6925 u64 offset, u64 bytes)
6927 struct btrfs_free_space *entry;
6933 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6934 bytenr = btrfs_sb_offset(i);
6935 ret = btrfs_rmap_block(root->fs_info,
6936 cache->key.objectid, bytenr, 0,
6937 &logical, &nr, &stripe_len);
6942 if (logical[nr] + stripe_len <= offset)
6944 if (offset + bytes <= logical[nr])
6946 if (logical[nr] == offset) {
6947 if (stripe_len >= bytes) {
6951 bytes -= stripe_len;
6952 offset += stripe_len;
6953 } else if (logical[nr] < offset) {
6954 if (logical[nr] + stripe_len >=
6959 bytes = (offset + bytes) -
6960 (logical[nr] + stripe_len);
6961 offset = logical[nr] + stripe_len;
6964 * Could be tricky, the super may land in the
6965 * middle of the area we're checking. First
6966 * check the easiest case, it's at the end.
6968 if (logical[nr] + stripe_len >=
6970 bytes = logical[nr] - offset;
6974 /* Check the left side */
6975 ret = check_cache_range(root, cache,
6977 logical[nr] - offset);
6983 /* Now we continue with the right side */
6984 bytes = (offset + bytes) -
6985 (logical[nr] + stripe_len);
6986 offset = logical[nr] + stripe_len;
6993 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6995 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6996 offset, offset+bytes);
7000 if (entry->offset != offset) {
7001 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7006 if (entry->bytes != bytes) {
7007 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7008 bytes, entry->bytes, offset);
7012 unlink_free_space(cache->free_space_ctl, entry);
7017 static int verify_space_cache(struct btrfs_root *root,
7018 struct btrfs_block_group_cache *cache)
7020 struct btrfs_path path;
7021 struct extent_buffer *leaf;
7022 struct btrfs_key key;
7026 root = root->fs_info->extent_root;
7028 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7030 btrfs_init_path(&path);
7031 key.objectid = last;
7033 key.type = BTRFS_EXTENT_ITEM_KEY;
7034 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7039 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7040 ret = btrfs_next_leaf(root, &path);
7048 leaf = path.nodes[0];
7049 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7050 if (key.objectid >= cache->key.offset + cache->key.objectid)
7052 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7053 key.type != BTRFS_METADATA_ITEM_KEY) {
7058 if (last == key.objectid) {
7059 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7060 last = key.objectid + key.offset;
7062 last = key.objectid + root->fs_info->nodesize;
7067 ret = check_cache_range(root, cache, last,
7068 key.objectid - last);
7071 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7072 last = key.objectid + key.offset;
7074 last = key.objectid + root->fs_info->nodesize;
7078 if (last < cache->key.objectid + cache->key.offset)
7079 ret = check_cache_range(root, cache, last,
7080 cache->key.objectid +
7081 cache->key.offset - last);
7084 btrfs_release_path(&path);
7087 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7088 fprintf(stderr, "There are still entries left in the space "
7096 static int check_space_cache(struct btrfs_root *root)
7098 struct btrfs_block_group_cache *cache;
7099 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7103 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7104 btrfs_super_generation(root->fs_info->super_copy) !=
7105 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7106 printf("cache and super generation don't match, space cache "
7107 "will be invalidated\n");
7111 if (ctx.progress_enabled) {
7112 ctx.tp = TASK_FREE_SPACE;
7113 task_start(ctx.info);
7117 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7121 start = cache->key.objectid + cache->key.offset;
7122 if (!cache->free_space_ctl) {
7123 if (btrfs_init_free_space_ctl(cache,
7124 root->fs_info->sectorsize)) {
7129 btrfs_remove_free_space_cache(cache);
7132 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7133 ret = exclude_super_stripes(root, cache);
7135 fprintf(stderr, "could not exclude super stripes: %s\n",
7140 ret = load_free_space_tree(root->fs_info, cache);
7141 free_excluded_extents(root, cache);
7143 fprintf(stderr, "could not load free space tree: %s\n",
7150 ret = load_free_space_cache(root->fs_info, cache);
7155 ret = verify_space_cache(root, cache);
7157 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7158 cache->key.objectid);
7163 task_stop(ctx.info);
7165 return error ? -EINVAL : 0;
7168 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7169 u64 num_bytes, unsigned long leaf_offset,
7170 struct extent_buffer *eb) {
7172 struct btrfs_fs_info *fs_info = root->fs_info;
7174 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7176 unsigned long csum_offset;
7180 u64 data_checked = 0;
7186 if (num_bytes % fs_info->sectorsize)
7189 data = malloc(num_bytes);
7193 while (offset < num_bytes) {
7196 read_len = num_bytes - offset;
7197 /* read as much space once a time */
7198 ret = read_extent_data(fs_info, data + offset,
7199 bytenr + offset, &read_len, mirror);
7203 /* verify every 4k data's checksum */
7204 while (data_checked < read_len) {
7206 tmp = offset + data_checked;
7208 csum = btrfs_csum_data((char *)data + tmp,
7209 csum, fs_info->sectorsize);
7210 btrfs_csum_final(csum, (u8 *)&csum);
7212 csum_offset = leaf_offset +
7213 tmp / fs_info->sectorsize * csum_size;
7214 read_extent_buffer(eb, (char *)&csum_expected,
7215 csum_offset, csum_size);
7216 /* try another mirror */
7217 if (csum != csum_expected) {
7218 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7219 mirror, bytenr + tmp,
7220 csum, csum_expected);
7221 num_copies = btrfs_num_copies(root->fs_info,
7223 if (mirror < num_copies - 1) {
7228 data_checked += fs_info->sectorsize;
7237 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7240 struct btrfs_path path;
7241 struct extent_buffer *leaf;
7242 struct btrfs_key key;
7245 btrfs_init_path(&path);
7246 key.objectid = bytenr;
7247 key.type = BTRFS_EXTENT_ITEM_KEY;
7248 key.offset = (u64)-1;
7251 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7254 fprintf(stderr, "Error looking up extent record %d\n", ret);
7255 btrfs_release_path(&path);
7258 if (path.slots[0] > 0) {
7261 ret = btrfs_prev_leaf(root, &path);
7264 } else if (ret > 0) {
7271 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7274 * Block group items come before extent items if they have the same
7275 * bytenr, so walk back one more just in case. Dear future traveller,
7276 * first congrats on mastering time travel. Now if it's not too much
7277 * trouble could you go back to 2006 and tell Chris to make the
7278 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7279 * EXTENT_ITEM_KEY please?
7281 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7282 if (path.slots[0] > 0) {
7285 ret = btrfs_prev_leaf(root, &path);
7288 } else if (ret > 0) {
7293 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7297 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7298 ret = btrfs_next_leaf(root, &path);
7300 fprintf(stderr, "Error going to next leaf "
7302 btrfs_release_path(&path);
7308 leaf = path.nodes[0];
7309 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7310 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7314 if (key.objectid + key.offset < bytenr) {
7318 if (key.objectid > bytenr + num_bytes)
7321 if (key.objectid == bytenr) {
7322 if (key.offset >= num_bytes) {
7326 num_bytes -= key.offset;
7327 bytenr += key.offset;
7328 } else if (key.objectid < bytenr) {
7329 if (key.objectid + key.offset >= bytenr + num_bytes) {
7333 num_bytes = (bytenr + num_bytes) -
7334 (key.objectid + key.offset);
7335 bytenr = key.objectid + key.offset;
7337 if (key.objectid + key.offset < bytenr + num_bytes) {
7338 u64 new_start = key.objectid + key.offset;
7339 u64 new_bytes = bytenr + num_bytes - new_start;
7342 * Weird case, the extent is in the middle of
7343 * our range, we'll have to search one side
7344 * and then the other. Not sure if this happens
7345 * in real life, but no harm in coding it up
7346 * anyway just in case.
7348 btrfs_release_path(&path);
7349 ret = check_extent_exists(root, new_start,
7352 fprintf(stderr, "Right section didn't "
7356 num_bytes = key.objectid - bytenr;
7359 num_bytes = key.objectid - bytenr;
7366 if (num_bytes && !ret) {
7367 fprintf(stderr, "There are no extents for csum range "
7368 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7372 btrfs_release_path(&path);
7376 static int check_csums(struct btrfs_root *root)
7378 struct btrfs_path path;
7379 struct extent_buffer *leaf;
7380 struct btrfs_key key;
7381 u64 offset = 0, num_bytes = 0;
7382 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7386 unsigned long leaf_offset;
7388 root = root->fs_info->csum_root;
7389 if (!extent_buffer_uptodate(root->node)) {
7390 fprintf(stderr, "No valid csum tree found\n");
7394 btrfs_init_path(&path);
7395 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7396 key.type = BTRFS_EXTENT_CSUM_KEY;
7398 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7400 fprintf(stderr, "Error searching csum tree %d\n", ret);
7401 btrfs_release_path(&path);
7405 if (ret > 0 && path.slots[0])
7410 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7411 ret = btrfs_next_leaf(root, &path);
7413 fprintf(stderr, "Error going to next leaf "
7420 leaf = path.nodes[0];
7422 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7423 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7428 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7429 csum_size) * root->fs_info->sectorsize;
7430 if (!check_data_csum)
7431 goto skip_csum_check;
7432 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7433 ret = check_extent_csums(root, key.offset, data_len,
7439 offset = key.offset;
7440 } else if (key.offset != offset + num_bytes) {
7441 ret = check_extent_exists(root, offset, num_bytes);
7443 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7444 "there is no extent record\n",
7445 offset, offset+num_bytes);
7448 offset = key.offset;
7451 num_bytes += data_len;
7455 btrfs_release_path(&path);
7459 static int is_dropped_key(struct btrfs_key *key,
7460 struct btrfs_key *drop_key) {
7461 if (key->objectid < drop_key->objectid)
7463 else if (key->objectid == drop_key->objectid) {
7464 if (key->type < drop_key->type)
7466 else if (key->type == drop_key->type) {
7467 if (key->offset < drop_key->offset)
7475 * Here are the rules for FULL_BACKREF.
7477 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7478 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7480 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7481 * if it happened after the relocation occurred since we'll have dropped the
7482 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7483 * have no real way to know for sure.
7485 * We process the blocks one root at a time, and we start from the lowest root
7486 * objectid and go to the highest. So we can just lookup the owner backref for
7487 * the record and if we don't find it then we know it doesn't exist and we have
7490 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7491 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7492 * be set or not and then we can check later once we've gathered all the refs.
7494 static int calc_extent_flag(struct cache_tree *extent_cache,
7495 struct extent_buffer *buf,
7496 struct root_item_record *ri,
7499 struct extent_record *rec;
7500 struct cache_extent *cache;
7501 struct tree_backref *tback;
7504 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7505 /* we have added this extent before */
7509 rec = container_of(cache, struct extent_record, cache);
7512 * Except file/reloc tree, we can not have
7515 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7520 if (buf->start == ri->bytenr)
7523 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7526 owner = btrfs_header_owner(buf);
7527 if (owner == ri->objectid)
7530 tback = find_tree_backref(rec, 0, owner);
7535 if (rec->flag_block_full_backref != FLAG_UNSET &&
7536 rec->flag_block_full_backref != 0)
7537 rec->bad_full_backref = 1;
7540 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7541 if (rec->flag_block_full_backref != FLAG_UNSET &&
7542 rec->flag_block_full_backref != 1)
7543 rec->bad_full_backref = 1;
7547 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7549 fprintf(stderr, "Invalid key type(");
7550 print_key_type(stderr, 0, key_type);
7551 fprintf(stderr, ") found in root(");
7552 print_objectid(stderr, rootid, 0);
7553 fprintf(stderr, ")\n");
7557 * Check if the key is valid with its extent buffer.
7559 * This is a early check in case invalid key exists in a extent buffer
7560 * This is not comprehensive yet, but should prevent wrong key/item passed
7563 static int check_type_with_root(u64 rootid, u8 key_type)
7566 /* Only valid in chunk tree */
7567 case BTRFS_DEV_ITEM_KEY:
7568 case BTRFS_CHUNK_ITEM_KEY:
7569 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7572 /* valid in csum and log tree */
7573 case BTRFS_CSUM_TREE_OBJECTID:
7574 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7578 case BTRFS_EXTENT_ITEM_KEY:
7579 case BTRFS_METADATA_ITEM_KEY:
7580 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7581 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7584 case BTRFS_ROOT_ITEM_KEY:
7585 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7588 case BTRFS_DEV_EXTENT_KEY:
7589 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7595 report_mismatch_key_root(key_type, rootid);
7599 static int run_next_block(struct btrfs_root *root,
7600 struct block_info *bits,
7603 struct cache_tree *pending,
7604 struct cache_tree *seen,
7605 struct cache_tree *reada,
7606 struct cache_tree *nodes,
7607 struct cache_tree *extent_cache,
7608 struct cache_tree *chunk_cache,
7609 struct rb_root *dev_cache,
7610 struct block_group_tree *block_group_cache,
7611 struct device_extent_tree *dev_extent_cache,
7612 struct root_item_record *ri)
7614 struct btrfs_fs_info *fs_info = root->fs_info;
7615 struct extent_buffer *buf;
7616 struct extent_record *rec = NULL;
7627 struct btrfs_key key;
7628 struct cache_extent *cache;
7631 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7632 bits_nr, &reada_bits);
7637 for(i = 0; i < nritems; i++) {
7638 ret = add_cache_extent(reada, bits[i].start,
7643 /* fixme, get the parent transid */
7644 readahead_tree_block(fs_info, bits[i].start,
7648 *last = bits[0].start;
7649 bytenr = bits[0].start;
7650 size = bits[0].size;
7652 cache = lookup_cache_extent(pending, bytenr, size);
7654 remove_cache_extent(pending, cache);
7657 cache = lookup_cache_extent(reada, bytenr, size);
7659 remove_cache_extent(reada, cache);
7662 cache = lookup_cache_extent(nodes, bytenr, size);
7664 remove_cache_extent(nodes, cache);
7667 cache = lookup_cache_extent(extent_cache, bytenr, size);
7669 rec = container_of(cache, struct extent_record, cache);
7670 gen = rec->parent_generation;
7673 /* fixme, get the real parent transid */
7674 buf = read_tree_block(root->fs_info, bytenr, size, gen);
7675 if (!extent_buffer_uptodate(buf)) {
7676 record_bad_block_io(root->fs_info,
7677 extent_cache, bytenr, size);
7681 nritems = btrfs_header_nritems(buf);
7684 if (!init_extent_tree) {
7685 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7686 btrfs_header_level(buf), 1, NULL,
7689 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7691 fprintf(stderr, "Couldn't calc extent flags\n");
7692 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7697 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7699 fprintf(stderr, "Couldn't calc extent flags\n");
7700 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7704 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7706 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7707 ri->objectid == btrfs_header_owner(buf)) {
7709 * Ok we got to this block from it's original owner and
7710 * we have FULL_BACKREF set. Relocation can leave
7711 * converted blocks over so this is altogether possible,
7712 * however it's not possible if the generation > the
7713 * last snapshot, so check for this case.
7715 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7716 btrfs_header_generation(buf) > ri->last_snapshot) {
7717 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7718 rec->bad_full_backref = 1;
7723 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7724 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7725 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7726 rec->bad_full_backref = 1;
7730 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7731 rec->flag_block_full_backref = 1;
7735 rec->flag_block_full_backref = 0;
7737 owner = btrfs_header_owner(buf);
7740 ret = check_block(root, extent_cache, buf, flags);
7744 if (btrfs_is_leaf(buf)) {
7745 btree_space_waste += btrfs_leaf_free_space(root, buf);
7746 for (i = 0; i < nritems; i++) {
7747 struct btrfs_file_extent_item *fi;
7748 btrfs_item_key_to_cpu(buf, &key, i);
7750 * Check key type against the leaf owner.
7751 * Could filter quite a lot of early error if
7754 if (check_type_with_root(btrfs_header_owner(buf),
7756 fprintf(stderr, "ignoring invalid key\n");
7759 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7760 process_extent_item(root, extent_cache, buf,
7764 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7765 process_extent_item(root, extent_cache, buf,
7769 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7771 btrfs_item_size_nr(buf, i);
7774 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7775 process_chunk_item(chunk_cache, &key, buf, i);
7778 if (key.type == BTRFS_DEV_ITEM_KEY) {
7779 process_device_item(dev_cache, &key, buf, i);
7782 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7783 process_block_group_item(block_group_cache,
7787 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7788 process_device_extent_item(dev_extent_cache,
7793 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7794 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7795 process_extent_ref_v0(extent_cache, buf, i);
7802 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7803 ret = add_tree_backref(extent_cache,
7804 key.objectid, 0, key.offset, 0);
7807 "add_tree_backref failed (leaf tree block): %s",
7811 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7812 ret = add_tree_backref(extent_cache,
7813 key.objectid, key.offset, 0, 0);
7816 "add_tree_backref failed (leaf shared block): %s",
7820 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7821 struct btrfs_extent_data_ref *ref;
7822 ref = btrfs_item_ptr(buf, i,
7823 struct btrfs_extent_data_ref);
7824 add_data_backref(extent_cache,
7826 btrfs_extent_data_ref_root(buf, ref),
7827 btrfs_extent_data_ref_objectid(buf,
7829 btrfs_extent_data_ref_offset(buf, ref),
7830 btrfs_extent_data_ref_count(buf, ref),
7831 0, root->fs_info->sectorsize);
7834 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7835 struct btrfs_shared_data_ref *ref;
7836 ref = btrfs_item_ptr(buf, i,
7837 struct btrfs_shared_data_ref);
7838 add_data_backref(extent_cache,
7839 key.objectid, key.offset, 0, 0, 0,
7840 btrfs_shared_data_ref_count(buf, ref),
7841 0, root->fs_info->sectorsize);
7844 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7845 struct bad_item *bad;
7847 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7851 bad = malloc(sizeof(struct bad_item));
7854 INIT_LIST_HEAD(&bad->list);
7855 memcpy(&bad->key, &key,
7856 sizeof(struct btrfs_key));
7857 bad->root_id = owner;
7858 list_add_tail(&bad->list, &delete_items);
7861 if (key.type != BTRFS_EXTENT_DATA_KEY)
7863 fi = btrfs_item_ptr(buf, i,
7864 struct btrfs_file_extent_item);
7865 if (btrfs_file_extent_type(buf, fi) ==
7866 BTRFS_FILE_EXTENT_INLINE)
7868 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7871 data_bytes_allocated +=
7872 btrfs_file_extent_disk_num_bytes(buf, fi);
7873 if (data_bytes_allocated < root->fs_info->sectorsize) {
7876 data_bytes_referenced +=
7877 btrfs_file_extent_num_bytes(buf, fi);
7878 add_data_backref(extent_cache,
7879 btrfs_file_extent_disk_bytenr(buf, fi),
7880 parent, owner, key.objectid, key.offset -
7881 btrfs_file_extent_offset(buf, fi), 1, 1,
7882 btrfs_file_extent_disk_num_bytes(buf, fi));
7886 struct btrfs_key first_key;
7888 first_key.objectid = 0;
7891 btrfs_item_key_to_cpu(buf, &first_key, 0);
7892 level = btrfs_header_level(buf);
7893 for (i = 0; i < nritems; i++) {
7894 struct extent_record tmpl;
7896 ptr = btrfs_node_blockptr(buf, i);
7897 size = root->fs_info->nodesize;
7898 btrfs_node_key_to_cpu(buf, &key, i);
7900 if ((level == ri->drop_level)
7901 && is_dropped_key(&key, &ri->drop_key)) {
7906 memset(&tmpl, 0, sizeof(tmpl));
7907 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7908 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7913 tmpl.max_size = size;
7914 ret = add_extent_rec(extent_cache, &tmpl);
7918 ret = add_tree_backref(extent_cache, ptr, parent,
7922 "add_tree_backref failed (non-leaf block): %s",
7928 add_pending(nodes, seen, ptr, size);
7930 add_pending(pending, seen, ptr, size);
7933 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7934 nritems) * sizeof(struct btrfs_key_ptr);
7936 total_btree_bytes += buf->len;
7937 if (fs_root_objectid(btrfs_header_owner(buf)))
7938 total_fs_tree_bytes += buf->len;
7939 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7940 total_extent_tree_bytes += buf->len;
7941 if (!found_old_backref &&
7942 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7943 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7944 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7945 found_old_backref = 1;
7947 free_extent_buffer(buf);
7951 static int add_root_to_pending(struct extent_buffer *buf,
7952 struct cache_tree *extent_cache,
7953 struct cache_tree *pending,
7954 struct cache_tree *seen,
7955 struct cache_tree *nodes,
7958 struct extent_record tmpl;
7961 if (btrfs_header_level(buf) > 0)
7962 add_pending(nodes, seen, buf->start, buf->len);
7964 add_pending(pending, seen, buf->start, buf->len);
7966 memset(&tmpl, 0, sizeof(tmpl));
7967 tmpl.start = buf->start;
7972 tmpl.max_size = buf->len;
7973 add_extent_rec(extent_cache, &tmpl);
7975 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7976 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7977 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7980 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7985 /* as we fix the tree, we might be deleting blocks that
7986 * we're tracking for repair. This hook makes sure we
7987 * remove any backrefs for blocks as we are fixing them.
7989 static int free_extent_hook(struct btrfs_trans_handle *trans,
7990 struct btrfs_root *root,
7991 u64 bytenr, u64 num_bytes, u64 parent,
7992 u64 root_objectid, u64 owner, u64 offset,
7995 struct extent_record *rec;
7996 struct cache_extent *cache;
7998 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
8000 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
8001 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8005 rec = container_of(cache, struct extent_record, cache);
8007 struct data_backref *back;
8008 back = find_data_backref(rec, parent, root_objectid, owner,
8009 offset, 1, bytenr, num_bytes);
8012 if (back->node.found_ref) {
8013 back->found_ref -= refs_to_drop;
8015 rec->refs -= refs_to_drop;
8017 if (back->node.found_extent_tree) {
8018 back->num_refs -= refs_to_drop;
8019 if (rec->extent_item_refs)
8020 rec->extent_item_refs -= refs_to_drop;
8022 if (back->found_ref == 0)
8023 back->node.found_ref = 0;
8024 if (back->num_refs == 0)
8025 back->node.found_extent_tree = 0;
8027 if (!back->node.found_extent_tree && back->node.found_ref) {
8028 list_del(&back->node.list);
8032 struct tree_backref *back;
8033 back = find_tree_backref(rec, parent, root_objectid);
8036 if (back->node.found_ref) {
8039 back->node.found_ref = 0;
8041 if (back->node.found_extent_tree) {
8042 if (rec->extent_item_refs)
8043 rec->extent_item_refs--;
8044 back->node.found_extent_tree = 0;
8046 if (!back->node.found_extent_tree && back->node.found_ref) {
8047 list_del(&back->node.list);
8051 maybe_free_extent_rec(extent_cache, rec);
8056 static int delete_extent_records(struct btrfs_trans_handle *trans,
8057 struct btrfs_root *root,
8058 struct btrfs_path *path,
8061 struct btrfs_key key;
8062 struct btrfs_key found_key;
8063 struct extent_buffer *leaf;
8068 key.objectid = bytenr;
8070 key.offset = (u64)-1;
8073 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8080 if (path->slots[0] == 0)
8086 leaf = path->nodes[0];
8087 slot = path->slots[0];
8089 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8090 if (found_key.objectid != bytenr)
8093 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8094 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8095 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8096 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8097 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8098 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8099 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8100 btrfs_release_path(path);
8101 if (found_key.type == 0) {
8102 if (found_key.offset == 0)
8104 key.offset = found_key.offset - 1;
8105 key.type = found_key.type;
8107 key.type = found_key.type - 1;
8108 key.offset = (u64)-1;
8112 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8113 found_key.objectid, found_key.type, found_key.offset);
8115 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8118 btrfs_release_path(path);
8120 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8121 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8122 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8123 found_key.offset : root->fs_info->nodesize;
8125 ret = btrfs_update_block_group(trans, root, bytenr,
8132 btrfs_release_path(path);
8137 * for a single backref, this will allocate a new extent
8138 * and add the backref to it.
8140 static int record_extent(struct btrfs_trans_handle *trans,
8141 struct btrfs_fs_info *info,
8142 struct btrfs_path *path,
8143 struct extent_record *rec,
8144 struct extent_backref *back,
8145 int allocated, u64 flags)
8148 struct btrfs_root *extent_root = info->extent_root;
8149 struct extent_buffer *leaf;
8150 struct btrfs_key ins_key;
8151 struct btrfs_extent_item *ei;
8152 struct data_backref *dback;
8153 struct btrfs_tree_block_info *bi;
8156 rec->max_size = max_t(u64, rec->max_size,
8160 u32 item_size = sizeof(*ei);
8163 item_size += sizeof(*bi);
8165 ins_key.objectid = rec->start;
8166 ins_key.offset = rec->max_size;
8167 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8169 ret = btrfs_insert_empty_item(trans, extent_root, path,
8170 &ins_key, item_size);
8174 leaf = path->nodes[0];
8175 ei = btrfs_item_ptr(leaf, path->slots[0],
8176 struct btrfs_extent_item);
8178 btrfs_set_extent_refs(leaf, ei, 0);
8179 btrfs_set_extent_generation(leaf, ei, rec->generation);
8181 if (back->is_data) {
8182 btrfs_set_extent_flags(leaf, ei,
8183 BTRFS_EXTENT_FLAG_DATA);
8185 struct btrfs_disk_key copy_key;;
8187 bi = (struct btrfs_tree_block_info *)(ei + 1);
8188 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8191 btrfs_set_disk_key_objectid(©_key,
8192 rec->info_objectid);
8193 btrfs_set_disk_key_type(©_key, 0);
8194 btrfs_set_disk_key_offset(©_key, 0);
8196 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8197 btrfs_set_tree_block_key(leaf, bi, ©_key);
8199 btrfs_set_extent_flags(leaf, ei,
8200 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8203 btrfs_mark_buffer_dirty(leaf);
8204 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8205 rec->max_size, 1, 0);
8208 btrfs_release_path(path);
8211 if (back->is_data) {
8215 dback = to_data_backref(back);
8216 if (back->full_backref)
8217 parent = dback->parent;
8221 for (i = 0; i < dback->found_ref; i++) {
8222 /* if parent != 0, we're doing a full backref
8223 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8224 * just makes the backref allocator create a data
8227 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8228 rec->start, rec->max_size,
8232 BTRFS_FIRST_FREE_OBJECTID :
8238 fprintf(stderr, "adding new data backref"
8239 " on %llu %s %llu owner %llu"
8240 " offset %llu found %d\n",
8241 (unsigned long long)rec->start,
8242 back->full_backref ?
8244 back->full_backref ?
8245 (unsigned long long)parent :
8246 (unsigned long long)dback->root,
8247 (unsigned long long)dback->owner,
8248 (unsigned long long)dback->offset,
8252 struct tree_backref *tback;
8254 tback = to_tree_backref(back);
8255 if (back->full_backref)
8256 parent = tback->parent;
8260 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8261 rec->start, rec->max_size,
8262 parent, tback->root, 0, 0);
8263 fprintf(stderr, "adding new tree backref on "
8264 "start %llu len %llu parent %llu root %llu\n",
8265 rec->start, rec->max_size, parent, tback->root);
8268 btrfs_release_path(path);
8272 static struct extent_entry *find_entry(struct list_head *entries,
8273 u64 bytenr, u64 bytes)
8275 struct extent_entry *entry = NULL;
8277 list_for_each_entry(entry, entries, list) {
8278 if (entry->bytenr == bytenr && entry->bytes == bytes)
8285 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8287 struct extent_entry *entry, *best = NULL, *prev = NULL;
8289 list_for_each_entry(entry, entries, list) {
8291 * If there are as many broken entries as entries then we know
8292 * not to trust this particular entry.
8294 if (entry->broken == entry->count)
8298 * Special case, when there are only two entries and 'best' is
8308 * If our current entry == best then we can't be sure our best
8309 * is really the best, so we need to keep searching.
8311 if (best && best->count == entry->count) {
8317 /* Prev == entry, not good enough, have to keep searching */
8318 if (!prev->broken && prev->count == entry->count)
8322 best = (prev->count > entry->count) ? prev : entry;
8323 else if (best->count < entry->count)
8331 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8332 struct data_backref *dback, struct extent_entry *entry)
8334 struct btrfs_trans_handle *trans;
8335 struct btrfs_root *root;
8336 struct btrfs_file_extent_item *fi;
8337 struct extent_buffer *leaf;
8338 struct btrfs_key key;
8342 key.objectid = dback->root;
8343 key.type = BTRFS_ROOT_ITEM_KEY;
8344 key.offset = (u64)-1;
8345 root = btrfs_read_fs_root(info, &key);
8347 fprintf(stderr, "Couldn't find root for our ref\n");
8352 * The backref points to the original offset of the extent if it was
8353 * split, so we need to search down to the offset we have and then walk
8354 * forward until we find the backref we're looking for.
8356 key.objectid = dback->owner;
8357 key.type = BTRFS_EXTENT_DATA_KEY;
8358 key.offset = dback->offset;
8359 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8361 fprintf(stderr, "Error looking up ref %d\n", ret);
8366 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8367 ret = btrfs_next_leaf(root, path);
8369 fprintf(stderr, "Couldn't find our ref, next\n");
8373 leaf = path->nodes[0];
8374 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8375 if (key.objectid != dback->owner ||
8376 key.type != BTRFS_EXTENT_DATA_KEY) {
8377 fprintf(stderr, "Couldn't find our ref, search\n");
8380 fi = btrfs_item_ptr(leaf, path->slots[0],
8381 struct btrfs_file_extent_item);
8382 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8383 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8385 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8390 btrfs_release_path(path);
8392 trans = btrfs_start_transaction(root, 1);
8394 return PTR_ERR(trans);
8397 * Ok we have the key of the file extent we want to fix, now we can cow
8398 * down to the thing and fix it.
8400 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8402 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8403 key.objectid, key.type, key.offset, ret);
8407 fprintf(stderr, "Well that's odd, we just found this key "
8408 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8413 leaf = path->nodes[0];
8414 fi = btrfs_item_ptr(leaf, path->slots[0],
8415 struct btrfs_file_extent_item);
8417 if (btrfs_file_extent_compression(leaf, fi) &&
8418 dback->disk_bytenr != entry->bytenr) {
8419 fprintf(stderr, "Ref doesn't match the record start and is "
8420 "compressed, please take a btrfs-image of this file "
8421 "system and send it to a btrfs developer so they can "
8422 "complete this functionality for bytenr %Lu\n",
8423 dback->disk_bytenr);
8428 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8429 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8430 } else if (dback->disk_bytenr > entry->bytenr) {
8431 u64 off_diff, offset;
8433 off_diff = dback->disk_bytenr - entry->bytenr;
8434 offset = btrfs_file_extent_offset(leaf, fi);
8435 if (dback->disk_bytenr + offset +
8436 btrfs_file_extent_num_bytes(leaf, fi) >
8437 entry->bytenr + entry->bytes) {
8438 fprintf(stderr, "Ref is past the entry end, please "
8439 "take a btrfs-image of this file system and "
8440 "send it to a btrfs developer, ref %Lu\n",
8441 dback->disk_bytenr);
8446 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8447 btrfs_set_file_extent_offset(leaf, fi, offset);
8448 } else if (dback->disk_bytenr < entry->bytenr) {
8451 offset = btrfs_file_extent_offset(leaf, fi);
8452 if (dback->disk_bytenr + offset < entry->bytenr) {
8453 fprintf(stderr, "Ref is before the entry start, please"
8454 " take a btrfs-image of this file system and "
8455 "send it to a btrfs developer, ref %Lu\n",
8456 dback->disk_bytenr);
8461 offset += dback->disk_bytenr;
8462 offset -= entry->bytenr;
8463 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8464 btrfs_set_file_extent_offset(leaf, fi, offset);
8467 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8470 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8471 * only do this if we aren't using compression, otherwise it's a
8474 if (!btrfs_file_extent_compression(leaf, fi))
8475 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8477 printf("ram bytes may be wrong?\n");
8478 btrfs_mark_buffer_dirty(leaf);
8480 err = btrfs_commit_transaction(trans, root);
8481 btrfs_release_path(path);
8482 return ret ? ret : err;
8485 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8486 struct extent_record *rec)
8488 struct extent_backref *back;
8489 struct data_backref *dback;
8490 struct extent_entry *entry, *best = NULL;
8493 int broken_entries = 0;
8498 * Metadata is easy and the backrefs should always agree on bytenr and
8499 * size, if not we've got bigger issues.
8504 list_for_each_entry(back, &rec->backrefs, list) {
8505 if (back->full_backref || !back->is_data)
8508 dback = to_data_backref(back);
8511 * We only pay attention to backrefs that we found a real
8514 if (dback->found_ref == 0)
8518 * For now we only catch when the bytes don't match, not the
8519 * bytenr. We can easily do this at the same time, but I want
8520 * to have a fs image to test on before we just add repair
8521 * functionality willy-nilly so we know we won't screw up the
8525 entry = find_entry(&entries, dback->disk_bytenr,
8528 entry = malloc(sizeof(struct extent_entry));
8533 memset(entry, 0, sizeof(*entry));
8534 entry->bytenr = dback->disk_bytenr;
8535 entry->bytes = dback->bytes;
8536 list_add_tail(&entry->list, &entries);
8541 * If we only have on entry we may think the entries agree when
8542 * in reality they don't so we have to do some extra checking.
8544 if (dback->disk_bytenr != rec->start ||
8545 dback->bytes != rec->nr || back->broken)
8556 /* Yay all the backrefs agree, carry on good sir */
8557 if (nr_entries <= 1 && !mismatch)
8560 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8561 "%Lu\n", rec->start);
8564 * First we want to see if the backrefs can agree amongst themselves who
8565 * is right, so figure out which one of the entries has the highest
8568 best = find_most_right_entry(&entries);
8571 * Ok so we may have an even split between what the backrefs think, so
8572 * this is where we use the extent ref to see what it thinks.
8575 entry = find_entry(&entries, rec->start, rec->nr);
8576 if (!entry && (!broken_entries || !rec->found_rec)) {
8577 fprintf(stderr, "Backrefs don't agree with each other "
8578 "and extent record doesn't agree with anybody,"
8579 " so we can't fix bytenr %Lu bytes %Lu\n",
8580 rec->start, rec->nr);
8583 } else if (!entry) {
8585 * Ok our backrefs were broken, we'll assume this is the
8586 * correct value and add an entry for this range.
8588 entry = malloc(sizeof(struct extent_entry));
8593 memset(entry, 0, sizeof(*entry));
8594 entry->bytenr = rec->start;
8595 entry->bytes = rec->nr;
8596 list_add_tail(&entry->list, &entries);
8600 best = find_most_right_entry(&entries);
8602 fprintf(stderr, "Backrefs and extent record evenly "
8603 "split on who is right, this is going to "
8604 "require user input to fix bytenr %Lu bytes "
8605 "%Lu\n", rec->start, rec->nr);
8612 * I don't think this can happen currently as we'll abort() if we catch
8613 * this case higher up, but in case somebody removes that we still can't
8614 * deal with it properly here yet, so just bail out of that's the case.
8616 if (best->bytenr != rec->start) {
8617 fprintf(stderr, "Extent start and backref starts don't match, "
8618 "please use btrfs-image on this file system and send "
8619 "it to a btrfs developer so they can make fsck fix "
8620 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8621 rec->start, rec->nr);
8627 * Ok great we all agreed on an extent record, let's go find the real
8628 * references and fix up the ones that don't match.
8630 list_for_each_entry(back, &rec->backrefs, list) {
8631 if (back->full_backref || !back->is_data)
8634 dback = to_data_backref(back);
8637 * Still ignoring backrefs that don't have a real ref attached
8640 if (dback->found_ref == 0)
8643 if (dback->bytes == best->bytes &&
8644 dback->disk_bytenr == best->bytenr)
8647 ret = repair_ref(info, path, dback, best);
8653 * Ok we messed with the actual refs, which means we need to drop our
8654 * entire cache and go back and rescan. I know this is a huge pain and
8655 * adds a lot of extra work, but it's the only way to be safe. Once all
8656 * the backrefs agree we may not need to do anything to the extent
8661 while (!list_empty(&entries)) {
8662 entry = list_entry(entries.next, struct extent_entry, list);
8663 list_del_init(&entry->list);
8669 static int process_duplicates(struct cache_tree *extent_cache,
8670 struct extent_record *rec)
8672 struct extent_record *good, *tmp;
8673 struct cache_extent *cache;
8677 * If we found a extent record for this extent then return, or if we
8678 * have more than one duplicate we are likely going to need to delete
8681 if (rec->found_rec || rec->num_duplicates > 1)
8684 /* Shouldn't happen but just in case */
8685 BUG_ON(!rec->num_duplicates);
8688 * So this happens if we end up with a backref that doesn't match the
8689 * actual extent entry. So either the backref is bad or the extent
8690 * entry is bad. Either way we want to have the extent_record actually
8691 * reflect what we found in the extent_tree, so we need to take the
8692 * duplicate out and use that as the extent_record since the only way we
8693 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8695 remove_cache_extent(extent_cache, &rec->cache);
8697 good = to_extent_record(rec->dups.next);
8698 list_del_init(&good->list);
8699 INIT_LIST_HEAD(&good->backrefs);
8700 INIT_LIST_HEAD(&good->dups);
8701 good->cache.start = good->start;
8702 good->cache.size = good->nr;
8703 good->content_checked = 0;
8704 good->owner_ref_checked = 0;
8705 good->num_duplicates = 0;
8706 good->refs = rec->refs;
8707 list_splice_init(&rec->backrefs, &good->backrefs);
8709 cache = lookup_cache_extent(extent_cache, good->start,
8713 tmp = container_of(cache, struct extent_record, cache);
8716 * If we find another overlapping extent and it's found_rec is
8717 * set then it's a duplicate and we need to try and delete
8720 if (tmp->found_rec || tmp->num_duplicates > 0) {
8721 if (list_empty(&good->list))
8722 list_add_tail(&good->list,
8723 &duplicate_extents);
8724 good->num_duplicates += tmp->num_duplicates + 1;
8725 list_splice_init(&tmp->dups, &good->dups);
8726 list_del_init(&tmp->list);
8727 list_add_tail(&tmp->list, &good->dups);
8728 remove_cache_extent(extent_cache, &tmp->cache);
8733 * Ok we have another non extent item backed extent rec, so lets
8734 * just add it to this extent and carry on like we did above.
8736 good->refs += tmp->refs;
8737 list_splice_init(&tmp->backrefs, &good->backrefs);
8738 remove_cache_extent(extent_cache, &tmp->cache);
8741 ret = insert_cache_extent(extent_cache, &good->cache);
8744 return good->num_duplicates ? 0 : 1;
8747 static int delete_duplicate_records(struct btrfs_root *root,
8748 struct extent_record *rec)
8750 struct btrfs_trans_handle *trans;
8751 LIST_HEAD(delete_list);
8752 struct btrfs_path path;
8753 struct extent_record *tmp, *good, *n;
8756 struct btrfs_key key;
8758 btrfs_init_path(&path);
8761 /* Find the record that covers all of the duplicates. */
8762 list_for_each_entry(tmp, &rec->dups, list) {
8763 if (good->start < tmp->start)
8765 if (good->nr > tmp->nr)
8768 if (tmp->start + tmp->nr < good->start + good->nr) {
8769 fprintf(stderr, "Ok we have overlapping extents that "
8770 "aren't completely covered by each other, this "
8771 "is going to require more careful thought. "
8772 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8773 tmp->start, tmp->nr, good->start, good->nr);
8780 list_add_tail(&rec->list, &delete_list);
8782 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8785 list_move_tail(&tmp->list, &delete_list);
8788 root = root->fs_info->extent_root;
8789 trans = btrfs_start_transaction(root, 1);
8790 if (IS_ERR(trans)) {
8791 ret = PTR_ERR(trans);
8795 list_for_each_entry(tmp, &delete_list, list) {
8796 if (tmp->found_rec == 0)
8798 key.objectid = tmp->start;
8799 key.type = BTRFS_EXTENT_ITEM_KEY;
8800 key.offset = tmp->nr;
8802 /* Shouldn't happen but just in case */
8803 if (tmp->metadata) {
8804 fprintf(stderr, "Well this shouldn't happen, extent "
8805 "record overlaps but is metadata? "
8806 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8810 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8816 ret = btrfs_del_item(trans, root, &path);
8819 btrfs_release_path(&path);
8822 err = btrfs_commit_transaction(trans, root);
8826 while (!list_empty(&delete_list)) {
8827 tmp = to_extent_record(delete_list.next);
8828 list_del_init(&tmp->list);
8834 while (!list_empty(&rec->dups)) {
8835 tmp = to_extent_record(rec->dups.next);
8836 list_del_init(&tmp->list);
8840 btrfs_release_path(&path);
8842 if (!ret && !nr_del)
8843 rec->num_duplicates = 0;
8845 return ret ? ret : nr_del;
8848 static int find_possible_backrefs(struct btrfs_fs_info *info,
8849 struct btrfs_path *path,
8850 struct cache_tree *extent_cache,
8851 struct extent_record *rec)
8853 struct btrfs_root *root;
8854 struct extent_backref *back;
8855 struct data_backref *dback;
8856 struct cache_extent *cache;
8857 struct btrfs_file_extent_item *fi;
8858 struct btrfs_key key;
8862 list_for_each_entry(back, &rec->backrefs, list) {
8863 /* Don't care about full backrefs (poor unloved backrefs) */
8864 if (back->full_backref || !back->is_data)
8867 dback = to_data_backref(back);
8869 /* We found this one, we don't need to do a lookup */
8870 if (dback->found_ref)
8873 key.objectid = dback->root;
8874 key.type = BTRFS_ROOT_ITEM_KEY;
8875 key.offset = (u64)-1;
8877 root = btrfs_read_fs_root(info, &key);
8879 /* No root, definitely a bad ref, skip */
8880 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8882 /* Other err, exit */
8884 return PTR_ERR(root);
8886 key.objectid = dback->owner;
8887 key.type = BTRFS_EXTENT_DATA_KEY;
8888 key.offset = dback->offset;
8889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8891 btrfs_release_path(path);
8894 /* Didn't find it, we can carry on */
8899 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8900 struct btrfs_file_extent_item);
8901 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8902 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8903 btrfs_release_path(path);
8904 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8906 struct extent_record *tmp;
8907 tmp = container_of(cache, struct extent_record, cache);
8910 * If we found an extent record for the bytenr for this
8911 * particular backref then we can't add it to our
8912 * current extent record. We only want to add backrefs
8913 * that don't have a corresponding extent item in the
8914 * extent tree since they likely belong to this record
8915 * and we need to fix it if it doesn't match bytenrs.
8921 dback->found_ref += 1;
8922 dback->disk_bytenr = bytenr;
8923 dback->bytes = bytes;
8926 * Set this so the verify backref code knows not to trust the
8927 * values in this backref.
8936 * Record orphan data ref into corresponding root.
8938 * Return 0 if the extent item contains data ref and recorded.
8939 * Return 1 if the extent item contains no useful data ref
8940 * On that case, it may contains only shared_dataref or metadata backref
8941 * or the file extent exists(this should be handled by the extent bytenr
8943 * Return <0 if something goes wrong.
8945 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8946 struct extent_record *rec)
8948 struct btrfs_key key;
8949 struct btrfs_root *dest_root;
8950 struct extent_backref *back;
8951 struct data_backref *dback;
8952 struct orphan_data_extent *orphan;
8953 struct btrfs_path path;
8954 int recorded_data_ref = 0;
8959 btrfs_init_path(&path);
8960 list_for_each_entry(back, &rec->backrefs, list) {
8961 if (back->full_backref || !back->is_data ||
8962 !back->found_extent_tree)
8964 dback = to_data_backref(back);
8965 if (dback->found_ref)
8967 key.objectid = dback->root;
8968 key.type = BTRFS_ROOT_ITEM_KEY;
8969 key.offset = (u64)-1;
8971 dest_root = btrfs_read_fs_root(fs_info, &key);
8973 /* For non-exist root we just skip it */
8974 if (IS_ERR(dest_root) || !dest_root)
8977 key.objectid = dback->owner;
8978 key.type = BTRFS_EXTENT_DATA_KEY;
8979 key.offset = dback->offset;
8981 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8982 btrfs_release_path(&path);
8984 * For ret < 0, it's OK since the fs-tree may be corrupted,
8985 * we need to record it for inode/file extent rebuild.
8986 * For ret > 0, we record it only for file extent rebuild.
8987 * For ret == 0, the file extent exists but only bytenr
8988 * mismatch, let the original bytenr fix routine to handle,
8994 orphan = malloc(sizeof(*orphan));
8999 INIT_LIST_HEAD(&orphan->list);
9000 orphan->root = dback->root;
9001 orphan->objectid = dback->owner;
9002 orphan->offset = dback->offset;
9003 orphan->disk_bytenr = rec->cache.start;
9004 orphan->disk_len = rec->cache.size;
9005 list_add(&dest_root->orphan_data_extents, &orphan->list);
9006 recorded_data_ref = 1;
9009 btrfs_release_path(&path);
9011 return !recorded_data_ref;
9017 * when an incorrect extent item is found, this will delete
9018 * all of the existing entries for it and recreate them
9019 * based on what the tree scan found.
9021 static int fixup_extent_refs(struct btrfs_fs_info *info,
9022 struct cache_tree *extent_cache,
9023 struct extent_record *rec)
9025 struct btrfs_trans_handle *trans = NULL;
9027 struct btrfs_path path;
9028 struct list_head *cur = rec->backrefs.next;
9029 struct cache_extent *cache;
9030 struct extent_backref *back;
9034 if (rec->flag_block_full_backref)
9035 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9037 btrfs_init_path(&path);
9038 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9040 * Sometimes the backrefs themselves are so broken they don't
9041 * get attached to any meaningful rec, so first go back and
9042 * check any of our backrefs that we couldn't find and throw
9043 * them into the list if we find the backref so that
9044 * verify_backrefs can figure out what to do.
9046 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9051 /* step one, make sure all of the backrefs agree */
9052 ret = verify_backrefs(info, &path, rec);
9056 trans = btrfs_start_transaction(info->extent_root, 1);
9057 if (IS_ERR(trans)) {
9058 ret = PTR_ERR(trans);
9062 /* step two, delete all the existing records */
9063 ret = delete_extent_records(trans, info->extent_root, &path,
9069 /* was this block corrupt? If so, don't add references to it */
9070 cache = lookup_cache_extent(info->corrupt_blocks,
9071 rec->start, rec->max_size);
9077 /* step three, recreate all the refs we did find */
9078 while(cur != &rec->backrefs) {
9079 back = to_extent_backref(cur);
9083 * if we didn't find any references, don't create a
9086 if (!back->found_ref)
9089 rec->bad_full_backref = 0;
9090 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9098 int err = btrfs_commit_transaction(trans, info->extent_root);
9104 fprintf(stderr, "Repaired extent references for %llu\n",
9105 (unsigned long long)rec->start);
9107 btrfs_release_path(&path);
9111 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9112 struct extent_record *rec)
9114 struct btrfs_trans_handle *trans;
9115 struct btrfs_root *root = fs_info->extent_root;
9116 struct btrfs_path path;
9117 struct btrfs_extent_item *ei;
9118 struct btrfs_key key;
9122 key.objectid = rec->start;
9123 if (rec->metadata) {
9124 key.type = BTRFS_METADATA_ITEM_KEY;
9125 key.offset = rec->info_level;
9127 key.type = BTRFS_EXTENT_ITEM_KEY;
9128 key.offset = rec->max_size;
9131 trans = btrfs_start_transaction(root, 0);
9133 return PTR_ERR(trans);
9135 btrfs_init_path(&path);
9136 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9138 btrfs_release_path(&path);
9139 btrfs_commit_transaction(trans, root);
9142 fprintf(stderr, "Didn't find extent for %llu\n",
9143 (unsigned long long)rec->start);
9144 btrfs_release_path(&path);
9145 btrfs_commit_transaction(trans, root);
9149 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9150 struct btrfs_extent_item);
9151 flags = btrfs_extent_flags(path.nodes[0], ei);
9152 if (rec->flag_block_full_backref) {
9153 fprintf(stderr, "setting full backref on %llu\n",
9154 (unsigned long long)key.objectid);
9155 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9157 fprintf(stderr, "clearing full backref on %llu\n",
9158 (unsigned long long)key.objectid);
9159 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9161 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9162 btrfs_mark_buffer_dirty(path.nodes[0]);
9163 btrfs_release_path(&path);
9164 ret = btrfs_commit_transaction(trans, root);
9166 fprintf(stderr, "Repaired extent flags for %llu\n",
9167 (unsigned long long)rec->start);
9172 /* right now we only prune from the extent allocation tree */
9173 static int prune_one_block(struct btrfs_trans_handle *trans,
9174 struct btrfs_fs_info *info,
9175 struct btrfs_corrupt_block *corrupt)
9178 struct btrfs_path path;
9179 struct extent_buffer *eb;
9183 int level = corrupt->level + 1;
9185 btrfs_init_path(&path);
9187 /* we want to stop at the parent to our busted block */
9188 path.lowest_level = level;
9190 ret = btrfs_search_slot(trans, info->extent_root,
9191 &corrupt->key, &path, -1, 1);
9196 eb = path.nodes[level];
9203 * hopefully the search gave us the block we want to prune,
9204 * lets try that first
9206 slot = path.slots[level];
9207 found = btrfs_node_blockptr(eb, slot);
9208 if (found == corrupt->cache.start)
9211 nritems = btrfs_header_nritems(eb);
9213 /* the search failed, lets scan this node and hope we find it */
9214 for (slot = 0; slot < nritems; slot++) {
9215 found = btrfs_node_blockptr(eb, slot);
9216 if (found == corrupt->cache.start)
9220 * we couldn't find the bad block. TODO, search all the nodes for pointers
9223 if (eb == info->extent_root->node) {
9228 btrfs_release_path(&path);
9233 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9234 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9237 btrfs_release_path(&path);
9241 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9243 struct btrfs_trans_handle *trans = NULL;
9244 struct cache_extent *cache;
9245 struct btrfs_corrupt_block *corrupt;
9248 cache = search_cache_extent(info->corrupt_blocks, 0);
9252 trans = btrfs_start_transaction(info->extent_root, 1);
9254 return PTR_ERR(trans);
9256 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9257 prune_one_block(trans, info, corrupt);
9258 remove_cache_extent(info->corrupt_blocks, cache);
9261 return btrfs_commit_transaction(trans, info->extent_root);
9265 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9267 struct btrfs_block_group_cache *cache;
9272 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9273 &start, &end, EXTENT_DIRTY);
9276 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9281 cache = btrfs_lookup_first_block_group(fs_info, start);
9286 start = cache->key.objectid + cache->key.offset;
9290 static int check_extent_refs(struct btrfs_root *root,
9291 struct cache_tree *extent_cache)
9293 struct extent_record *rec;
9294 struct cache_extent *cache;
9300 * if we're doing a repair, we have to make sure
9301 * we don't allocate from the problem extents.
9302 * In the worst case, this will be all the
9305 cache = search_cache_extent(extent_cache, 0);
9307 rec = container_of(cache, struct extent_record, cache);
9308 set_extent_dirty(root->fs_info->excluded_extents,
9310 rec->start + rec->max_size - 1);
9311 cache = next_cache_extent(cache);
9314 /* pin down all the corrupted blocks too */
9315 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9317 set_extent_dirty(root->fs_info->excluded_extents,
9319 cache->start + cache->size - 1);
9320 cache = next_cache_extent(cache);
9322 prune_corrupt_blocks(root->fs_info);
9323 reset_cached_block_groups(root->fs_info);
9326 reset_cached_block_groups(root->fs_info);
9329 * We need to delete any duplicate entries we find first otherwise we
9330 * could mess up the extent tree when we have backrefs that actually
9331 * belong to a different extent item and not the weird duplicate one.
9333 while (repair && !list_empty(&duplicate_extents)) {
9334 rec = to_extent_record(duplicate_extents.next);
9335 list_del_init(&rec->list);
9337 /* Sometimes we can find a backref before we find an actual
9338 * extent, so we need to process it a little bit to see if there
9339 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9340 * if this is a backref screwup. If we need to delete stuff
9341 * process_duplicates() will return 0, otherwise it will return
9344 if (process_duplicates(extent_cache, rec))
9346 ret = delete_duplicate_records(root, rec);
9350 * delete_duplicate_records will return the number of entries
9351 * deleted, so if it's greater than 0 then we know we actually
9352 * did something and we need to remove.
9365 cache = search_cache_extent(extent_cache, 0);
9368 rec = container_of(cache, struct extent_record, cache);
9369 if (rec->num_duplicates) {
9370 fprintf(stderr, "extent item %llu has multiple extent "
9371 "items\n", (unsigned long long)rec->start);
9375 if (rec->refs != rec->extent_item_refs) {
9376 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9377 (unsigned long long)rec->start,
9378 (unsigned long long)rec->nr);
9379 fprintf(stderr, "extent item %llu, found %llu\n",
9380 (unsigned long long)rec->extent_item_refs,
9381 (unsigned long long)rec->refs);
9382 ret = record_orphan_data_extents(root->fs_info, rec);
9388 if (all_backpointers_checked(rec, 1)) {
9389 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9390 (unsigned long long)rec->start,
9391 (unsigned long long)rec->nr);
9395 if (!rec->owner_ref_checked) {
9396 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9397 (unsigned long long)rec->start,
9398 (unsigned long long)rec->nr);
9403 if (repair && fix) {
9404 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9410 if (rec->bad_full_backref) {
9411 fprintf(stderr, "bad full backref, on [%llu]\n",
9412 (unsigned long long)rec->start);
9414 ret = fixup_extent_flags(root->fs_info, rec);
9422 * Although it's not a extent ref's problem, we reuse this
9423 * routine for error reporting.
9424 * No repair function yet.
9426 if (rec->crossing_stripes) {
9428 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9429 rec->start, rec->start + rec->max_size);
9433 if (rec->wrong_chunk_type) {
9435 "bad extent [%llu, %llu), type mismatch with chunk\n",
9436 rec->start, rec->start + rec->max_size);
9440 remove_cache_extent(extent_cache, cache);
9441 free_all_extent_backrefs(rec);
9442 if (!init_extent_tree && repair && (!cur_err || fix))
9443 clear_extent_dirty(root->fs_info->excluded_extents,
9445 rec->start + rec->max_size - 1);
9450 if (ret && ret != -EAGAIN) {
9451 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9454 struct btrfs_trans_handle *trans;
9456 root = root->fs_info->extent_root;
9457 trans = btrfs_start_transaction(root, 1);
9458 if (IS_ERR(trans)) {
9459 ret = PTR_ERR(trans);
9463 btrfs_fix_block_accounting(trans, root);
9464 ret = btrfs_commit_transaction(trans, root);
9473 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9477 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9478 stripe_size = length;
9479 stripe_size /= num_stripes;
9480 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9481 stripe_size = length * 2;
9482 stripe_size /= num_stripes;
9483 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9484 stripe_size = length;
9485 stripe_size /= (num_stripes - 1);
9486 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9487 stripe_size = length;
9488 stripe_size /= (num_stripes - 2);
9490 stripe_size = length;
9496 * Check the chunk with its block group/dev list ref:
9497 * Return 0 if all refs seems valid.
9498 * Return 1 if part of refs seems valid, need later check for rebuild ref
9499 * like missing block group and needs to search extent tree to rebuild them.
9500 * Return -1 if essential refs are missing and unable to rebuild.
9502 static int check_chunk_refs(struct chunk_record *chunk_rec,
9503 struct block_group_tree *block_group_cache,
9504 struct device_extent_tree *dev_extent_cache,
9507 struct cache_extent *block_group_item;
9508 struct block_group_record *block_group_rec;
9509 struct cache_extent *dev_extent_item;
9510 struct device_extent_record *dev_extent_rec;
9514 int metadump_v2 = 0;
9518 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9521 if (block_group_item) {
9522 block_group_rec = container_of(block_group_item,
9523 struct block_group_record,
9525 if (chunk_rec->length != block_group_rec->offset ||
9526 chunk_rec->offset != block_group_rec->objectid ||
9528 chunk_rec->type_flags != block_group_rec->flags)) {
9531 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9532 chunk_rec->objectid,
9537 chunk_rec->type_flags,
9538 block_group_rec->objectid,
9539 block_group_rec->type,
9540 block_group_rec->offset,
9541 block_group_rec->offset,
9542 block_group_rec->objectid,
9543 block_group_rec->flags);
9546 list_del_init(&block_group_rec->list);
9547 chunk_rec->bg_rec = block_group_rec;
9552 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9553 chunk_rec->objectid,
9558 chunk_rec->type_flags);
9565 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9566 chunk_rec->num_stripes);
9567 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9568 devid = chunk_rec->stripes[i].devid;
9569 offset = chunk_rec->stripes[i].offset;
9570 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9571 devid, offset, length);
9572 if (dev_extent_item) {
9573 dev_extent_rec = container_of(dev_extent_item,
9574 struct device_extent_record,
9576 if (dev_extent_rec->objectid != devid ||
9577 dev_extent_rec->offset != offset ||
9578 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9579 dev_extent_rec->length != length) {
9582 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9583 chunk_rec->objectid,
9586 chunk_rec->stripes[i].devid,
9587 chunk_rec->stripes[i].offset,
9588 dev_extent_rec->objectid,
9589 dev_extent_rec->offset,
9590 dev_extent_rec->length);
9593 list_move(&dev_extent_rec->chunk_list,
9594 &chunk_rec->dextents);
9599 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9600 chunk_rec->objectid,
9603 chunk_rec->stripes[i].devid,
9604 chunk_rec->stripes[i].offset);
9611 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9612 int check_chunks(struct cache_tree *chunk_cache,
9613 struct block_group_tree *block_group_cache,
9614 struct device_extent_tree *dev_extent_cache,
9615 struct list_head *good, struct list_head *bad,
9616 struct list_head *rebuild, int silent)
9618 struct cache_extent *chunk_item;
9619 struct chunk_record *chunk_rec;
9620 struct block_group_record *bg_rec;
9621 struct device_extent_record *dext_rec;
9625 chunk_item = first_cache_extent(chunk_cache);
9626 while (chunk_item) {
9627 chunk_rec = container_of(chunk_item, struct chunk_record,
9629 err = check_chunk_refs(chunk_rec, block_group_cache,
9630 dev_extent_cache, silent);
9633 if (err == 0 && good)
9634 list_add_tail(&chunk_rec->list, good);
9635 if (err > 0 && rebuild)
9636 list_add_tail(&chunk_rec->list, rebuild);
9638 list_add_tail(&chunk_rec->list, bad);
9639 chunk_item = next_cache_extent(chunk_item);
9642 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9645 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9653 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9657 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9668 static int check_device_used(struct device_record *dev_rec,
9669 struct device_extent_tree *dext_cache)
9671 struct cache_extent *cache;
9672 struct device_extent_record *dev_extent_rec;
9675 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9677 dev_extent_rec = container_of(cache,
9678 struct device_extent_record,
9680 if (dev_extent_rec->objectid != dev_rec->devid)
9683 list_del_init(&dev_extent_rec->device_list);
9684 total_byte += dev_extent_rec->length;
9685 cache = next_cache_extent(cache);
9688 if (total_byte != dev_rec->byte_used) {
9690 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9691 total_byte, dev_rec->byte_used, dev_rec->objectid,
9692 dev_rec->type, dev_rec->offset);
9699 /* check btrfs_dev_item -> btrfs_dev_extent */
9700 static int check_devices(struct rb_root *dev_cache,
9701 struct device_extent_tree *dev_extent_cache)
9703 struct rb_node *dev_node;
9704 struct device_record *dev_rec;
9705 struct device_extent_record *dext_rec;
9709 dev_node = rb_first(dev_cache);
9711 dev_rec = container_of(dev_node, struct device_record, node);
9712 err = check_device_used(dev_rec, dev_extent_cache);
9716 dev_node = rb_next(dev_node);
9718 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9721 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9722 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9729 static int add_root_item_to_list(struct list_head *head,
9730 u64 objectid, u64 bytenr, u64 last_snapshot,
9731 u8 level, u8 drop_level,
9732 int level_size, struct btrfs_key *drop_key)
9735 struct root_item_record *ri_rec;
9736 ri_rec = malloc(sizeof(*ri_rec));
9739 ri_rec->bytenr = bytenr;
9740 ri_rec->objectid = objectid;
9741 ri_rec->level = level;
9742 ri_rec->level_size = level_size;
9743 ri_rec->drop_level = drop_level;
9744 ri_rec->last_snapshot = last_snapshot;
9746 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9747 list_add_tail(&ri_rec->list, head);
9752 static void free_root_item_list(struct list_head *list)
9754 struct root_item_record *ri_rec;
9756 while (!list_empty(list)) {
9757 ri_rec = list_first_entry(list, struct root_item_record,
9759 list_del_init(&ri_rec->list);
9764 static int deal_root_from_list(struct list_head *list,
9765 struct btrfs_root *root,
9766 struct block_info *bits,
9768 struct cache_tree *pending,
9769 struct cache_tree *seen,
9770 struct cache_tree *reada,
9771 struct cache_tree *nodes,
9772 struct cache_tree *extent_cache,
9773 struct cache_tree *chunk_cache,
9774 struct rb_root *dev_cache,
9775 struct block_group_tree *block_group_cache,
9776 struct device_extent_tree *dev_extent_cache)
9781 while (!list_empty(list)) {
9782 struct root_item_record *rec;
9783 struct extent_buffer *buf;
9784 rec = list_entry(list->next,
9785 struct root_item_record, list);
9787 buf = read_tree_block(root->fs_info,
9788 rec->bytenr, rec->level_size, 0);
9789 if (!extent_buffer_uptodate(buf)) {
9790 free_extent_buffer(buf);
9794 ret = add_root_to_pending(buf, extent_cache, pending,
9795 seen, nodes, rec->objectid);
9799 * To rebuild extent tree, we need deal with snapshot
9800 * one by one, otherwise we deal with node firstly which
9801 * can maximize readahead.
9804 ret = run_next_block(root, bits, bits_nr, &last,
9805 pending, seen, reada, nodes,
9806 extent_cache, chunk_cache,
9807 dev_cache, block_group_cache,
9808 dev_extent_cache, rec);
9812 free_extent_buffer(buf);
9813 list_del(&rec->list);
9819 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9820 reada, nodes, extent_cache, chunk_cache,
9821 dev_cache, block_group_cache,
9822 dev_extent_cache, NULL);
9832 static int check_chunks_and_extents(struct btrfs_root *root)
9834 struct rb_root dev_cache;
9835 struct cache_tree chunk_cache;
9836 struct block_group_tree block_group_cache;
9837 struct device_extent_tree dev_extent_cache;
9838 struct cache_tree extent_cache;
9839 struct cache_tree seen;
9840 struct cache_tree pending;
9841 struct cache_tree reada;
9842 struct cache_tree nodes;
9843 struct extent_io_tree excluded_extents;
9844 struct cache_tree corrupt_blocks;
9845 struct btrfs_path path;
9846 struct btrfs_key key;
9847 struct btrfs_key found_key;
9849 struct block_info *bits;
9851 struct extent_buffer *leaf;
9853 struct btrfs_root_item ri;
9854 struct list_head dropping_trees;
9855 struct list_head normal_trees;
9856 struct btrfs_root *root1;
9861 dev_cache = RB_ROOT;
9862 cache_tree_init(&chunk_cache);
9863 block_group_tree_init(&block_group_cache);
9864 device_extent_tree_init(&dev_extent_cache);
9866 cache_tree_init(&extent_cache);
9867 cache_tree_init(&seen);
9868 cache_tree_init(&pending);
9869 cache_tree_init(&nodes);
9870 cache_tree_init(&reada);
9871 cache_tree_init(&corrupt_blocks);
9872 extent_io_tree_init(&excluded_extents);
9873 INIT_LIST_HEAD(&dropping_trees);
9874 INIT_LIST_HEAD(&normal_trees);
9877 root->fs_info->excluded_extents = &excluded_extents;
9878 root->fs_info->fsck_extent_cache = &extent_cache;
9879 root->fs_info->free_extent_hook = free_extent_hook;
9880 root->fs_info->corrupt_blocks = &corrupt_blocks;
9884 bits = malloc(bits_nr * sizeof(struct block_info));
9890 if (ctx.progress_enabled) {
9891 ctx.tp = TASK_EXTENTS;
9892 task_start(ctx.info);
9896 root1 = root->fs_info->tree_root;
9897 level = btrfs_header_level(root1->node);
9898 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9899 root1->node->start, 0, level, 0,
9900 root1->fs_info->nodesize, NULL);
9903 root1 = root->fs_info->chunk_root;
9904 level = btrfs_header_level(root1->node);
9905 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9906 root1->node->start, 0, level, 0,
9907 root1->fs_info->nodesize, NULL);
9910 btrfs_init_path(&path);
9913 key.type = BTRFS_ROOT_ITEM_KEY;
9914 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9919 leaf = path.nodes[0];
9920 slot = path.slots[0];
9921 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9922 ret = btrfs_next_leaf(root, &path);
9925 leaf = path.nodes[0];
9926 slot = path.slots[0];
9928 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9929 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9930 unsigned long offset;
9933 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9934 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9935 last_snapshot = btrfs_root_last_snapshot(&ri);
9936 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9937 level = btrfs_root_level(&ri);
9938 level_size = root->fs_info->nodesize;
9939 ret = add_root_item_to_list(&normal_trees,
9941 btrfs_root_bytenr(&ri),
9942 last_snapshot, level,
9943 0, level_size, NULL);
9947 level = btrfs_root_level(&ri);
9948 level_size = root->fs_info->nodesize;
9949 objectid = found_key.objectid;
9950 btrfs_disk_key_to_cpu(&found_key,
9952 ret = add_root_item_to_list(&dropping_trees,
9954 btrfs_root_bytenr(&ri),
9955 last_snapshot, level,
9957 level_size, &found_key);
9964 btrfs_release_path(&path);
9967 * check_block can return -EAGAIN if it fixes something, please keep
9968 * this in mind when dealing with return values from these functions, if
9969 * we get -EAGAIN we want to fall through and restart the loop.
9971 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9972 &seen, &reada, &nodes, &extent_cache,
9973 &chunk_cache, &dev_cache, &block_group_cache,
9980 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9981 &pending, &seen, &reada, &nodes,
9982 &extent_cache, &chunk_cache, &dev_cache,
9983 &block_group_cache, &dev_extent_cache);
9990 ret = check_chunks(&chunk_cache, &block_group_cache,
9991 &dev_extent_cache, NULL, NULL, NULL, 0);
9998 ret = check_extent_refs(root, &extent_cache);
10000 if (ret == -EAGAIN)
10005 ret = check_devices(&dev_cache, &dev_extent_cache);
10010 task_stop(ctx.info);
10012 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10013 extent_io_tree_cleanup(&excluded_extents);
10014 root->fs_info->fsck_extent_cache = NULL;
10015 root->fs_info->free_extent_hook = NULL;
10016 root->fs_info->corrupt_blocks = NULL;
10017 root->fs_info->excluded_extents = NULL;
10020 free_chunk_cache_tree(&chunk_cache);
10021 free_device_cache_tree(&dev_cache);
10022 free_block_group_tree(&block_group_cache);
10023 free_device_extent_tree(&dev_extent_cache);
10024 free_extent_cache_tree(&seen);
10025 free_extent_cache_tree(&pending);
10026 free_extent_cache_tree(&reada);
10027 free_extent_cache_tree(&nodes);
10028 free_root_item_list(&normal_trees);
10029 free_root_item_list(&dropping_trees);
10032 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10033 free_extent_cache_tree(&seen);
10034 free_extent_cache_tree(&pending);
10035 free_extent_cache_tree(&reada);
10036 free_extent_cache_tree(&nodes);
10037 free_chunk_cache_tree(&chunk_cache);
10038 free_block_group_tree(&block_group_cache);
10039 free_device_cache_tree(&dev_cache);
10040 free_device_extent_tree(&dev_extent_cache);
10041 free_extent_record_cache(&extent_cache);
10042 free_root_item_list(&normal_trees);
10043 free_root_item_list(&dropping_trees);
10044 extent_io_tree_cleanup(&excluded_extents);
10049 * Check backrefs of a tree block given by @bytenr or @eb.
10051 * @root: the root containing the @bytenr or @eb
10052 * @eb: tree block extent buffer, can be NULL
10053 * @bytenr: bytenr of the tree block to search
10054 * @level: tree level of the tree block
10055 * @owner: owner of the tree block
10057 * Return >0 for any error found and output error message
10058 * Return 0 for no error found
10060 static int check_tree_block_ref(struct btrfs_root *root,
10061 struct extent_buffer *eb, u64 bytenr,
10062 int level, u64 owner)
10064 struct btrfs_key key;
10065 struct btrfs_root *extent_root = root->fs_info->extent_root;
10066 struct btrfs_path path;
10067 struct btrfs_extent_item *ei;
10068 struct btrfs_extent_inline_ref *iref;
10069 struct extent_buffer *leaf;
10075 u32 nodesize = root->fs_info->nodesize;
10078 int tree_reloc_root = 0;
10083 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10084 btrfs_header_bytenr(root->node) == bytenr)
10085 tree_reloc_root = 1;
10087 btrfs_init_path(&path);
10088 key.objectid = bytenr;
10089 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10090 key.type = BTRFS_METADATA_ITEM_KEY;
10092 key.type = BTRFS_EXTENT_ITEM_KEY;
10093 key.offset = (u64)-1;
10095 /* Search for the backref in extent tree */
10096 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10098 err |= BACKREF_MISSING;
10101 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10103 err |= BACKREF_MISSING;
10107 leaf = path.nodes[0];
10108 slot = path.slots[0];
10109 btrfs_item_key_to_cpu(leaf, &key, slot);
10111 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10113 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10114 skinny_level = (int)key.offset;
10115 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10117 struct btrfs_tree_block_info *info;
10119 info = (struct btrfs_tree_block_info *)(ei + 1);
10120 skinny_level = btrfs_tree_block_level(leaf, info);
10121 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10128 if (!(btrfs_extent_flags(leaf, ei) &
10129 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10131 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10132 key.objectid, nodesize,
10133 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10134 err = BACKREF_MISMATCH;
10136 header_gen = btrfs_header_generation(eb);
10137 extent_gen = btrfs_extent_generation(leaf, ei);
10138 if (header_gen != extent_gen) {
10140 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10141 key.objectid, nodesize, header_gen,
10143 err = BACKREF_MISMATCH;
10145 if (level != skinny_level) {
10147 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10148 key.objectid, nodesize, level, skinny_level);
10149 err = BACKREF_MISMATCH;
10151 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10153 "extent[%llu %u] is referred by other roots than %llu",
10154 key.objectid, nodesize, root->objectid);
10155 err = BACKREF_MISMATCH;
10160 * Iterate the extent/metadata item to find the exact backref
10162 item_size = btrfs_item_size_nr(leaf, slot);
10163 ptr = (unsigned long)iref;
10164 end = (unsigned long)ei + item_size;
10165 while (ptr < end) {
10166 iref = (struct btrfs_extent_inline_ref *)ptr;
10167 type = btrfs_extent_inline_ref_type(leaf, iref);
10168 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10170 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10171 (offset == root->objectid || offset == owner)) {
10173 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10175 * Backref of tree reloc root points to itself, no need
10176 * to check backref any more.
10178 if (tree_reloc_root)
10181 /* Check if the backref points to valid referencer */
10182 found_ref = !check_tree_block_ref(root, NULL,
10183 offset, level + 1, owner);
10188 ptr += btrfs_extent_inline_ref_size(type);
10192 * Inlined extent item doesn't have what we need, check
10193 * TREE_BLOCK_REF_KEY
10196 btrfs_release_path(&path);
10197 key.objectid = bytenr;
10198 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10199 key.offset = root->objectid;
10201 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10206 err |= BACKREF_MISSING;
10208 btrfs_release_path(&path);
10209 if (eb && (err & BACKREF_MISSING))
10210 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10211 bytenr, nodesize, owner, level);
10216 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10218 * Return >0 any error found and output error message
10219 * Return 0 for no error found
10221 static int check_extent_data_item(struct btrfs_root *root,
10222 struct extent_buffer *eb, int slot)
10224 struct btrfs_file_extent_item *fi;
10225 struct btrfs_path path;
10226 struct btrfs_root *extent_root = root->fs_info->extent_root;
10227 struct btrfs_key fi_key;
10228 struct btrfs_key dbref_key;
10229 struct extent_buffer *leaf;
10230 struct btrfs_extent_item *ei;
10231 struct btrfs_extent_inline_ref *iref;
10232 struct btrfs_extent_data_ref *dref;
10235 u64 disk_num_bytes;
10236 u64 extent_num_bytes;
10243 int found_dbackref = 0;
10247 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10248 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10250 /* Nothing to check for hole and inline data extents */
10251 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10252 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10255 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10256 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10257 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10259 /* Check unaligned disk_num_bytes and num_bytes */
10260 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10262 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10263 fi_key.objectid, fi_key.offset, disk_num_bytes,
10264 root->fs_info->sectorsize);
10265 err |= BYTES_UNALIGNED;
10267 data_bytes_allocated += disk_num_bytes;
10269 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10271 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10272 fi_key.objectid, fi_key.offset, extent_num_bytes,
10273 root->fs_info->sectorsize);
10274 err |= BYTES_UNALIGNED;
10276 data_bytes_referenced += extent_num_bytes;
10278 owner = btrfs_header_owner(eb);
10280 /* Check the extent item of the file extent in extent tree */
10281 btrfs_init_path(&path);
10282 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10283 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10284 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10286 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10290 leaf = path.nodes[0];
10291 slot = path.slots[0];
10292 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10294 extent_flags = btrfs_extent_flags(leaf, ei);
10296 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10298 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10299 disk_bytenr, disk_num_bytes,
10300 BTRFS_EXTENT_FLAG_DATA);
10301 err |= BACKREF_MISMATCH;
10304 /* Check data backref inside that extent item */
10305 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10306 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10307 ptr = (unsigned long)iref;
10308 end = (unsigned long)ei + item_size;
10309 while (ptr < end) {
10310 iref = (struct btrfs_extent_inline_ref *)ptr;
10311 type = btrfs_extent_inline_ref_type(leaf, iref);
10312 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10314 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10315 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10316 if (ref_root == owner || ref_root == root->objectid)
10317 found_dbackref = 1;
10318 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10319 found_dbackref = !check_tree_block_ref(root, NULL,
10320 btrfs_extent_inline_ref_offset(leaf, iref),
10324 if (found_dbackref)
10326 ptr += btrfs_extent_inline_ref_size(type);
10329 if (!found_dbackref) {
10330 btrfs_release_path(&path);
10332 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10333 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10334 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10335 dbref_key.offset = hash_extent_data_ref(root->objectid,
10336 fi_key.objectid, fi_key.offset);
10338 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10339 &dbref_key, &path, 0, 0);
10341 found_dbackref = 1;
10345 btrfs_release_path(&path);
10348 * Neither inlined nor EXTENT_DATA_REF found, try
10349 * SHARED_DATA_REF as last chance.
10351 dbref_key.objectid = disk_bytenr;
10352 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10353 dbref_key.offset = eb->start;
10355 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10356 &dbref_key, &path, 0, 0);
10358 found_dbackref = 1;
10364 if (!found_dbackref)
10365 err |= BACKREF_MISSING;
10366 btrfs_release_path(&path);
10367 if (err & BACKREF_MISSING) {
10368 error("data extent[%llu %llu] backref lost",
10369 disk_bytenr, disk_num_bytes);
10375 * Get real tree block level for the case like shared block
10376 * Return >= 0 as tree level
10377 * Return <0 for error
10379 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10381 struct extent_buffer *eb;
10382 struct btrfs_path path;
10383 struct btrfs_key key;
10384 struct btrfs_extent_item *ei;
10387 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10392 /* Search extent tree for extent generation and level */
10393 key.objectid = bytenr;
10394 key.type = BTRFS_METADATA_ITEM_KEY;
10395 key.offset = (u64)-1;
10397 btrfs_init_path(&path);
10398 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10401 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10409 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10410 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10411 struct btrfs_extent_item);
10412 flags = btrfs_extent_flags(path.nodes[0], ei);
10413 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10418 /* Get transid for later read_tree_block() check */
10419 transid = btrfs_extent_generation(path.nodes[0], ei);
10421 /* Get backref level as one source */
10422 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10423 backref_level = key.offset;
10425 struct btrfs_tree_block_info *info;
10427 info = (struct btrfs_tree_block_info *)(ei + 1);
10428 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10430 btrfs_release_path(&path);
10432 /* Get level from tree block as an alternative source */
10433 eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10434 if (!extent_buffer_uptodate(eb)) {
10435 free_extent_buffer(eb);
10438 header_level = btrfs_header_level(eb);
10439 free_extent_buffer(eb);
10441 if (header_level != backref_level)
10443 return header_level;
10446 btrfs_release_path(&path);
10451 * Check if a tree block backref is valid (points to a valid tree block)
10452 * if level == -1, level will be resolved
10453 * Return >0 for any error found and print error message
10455 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10456 u64 bytenr, int level)
10458 struct btrfs_root *root;
10459 struct btrfs_key key;
10460 struct btrfs_path path;
10461 struct extent_buffer *eb;
10462 struct extent_buffer *node;
10463 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10467 /* Query level for level == -1 special case */
10469 level = query_tree_block_level(fs_info, bytenr);
10471 err |= REFERENCER_MISSING;
10475 key.objectid = root_id;
10476 key.type = BTRFS_ROOT_ITEM_KEY;
10477 key.offset = (u64)-1;
10479 root = btrfs_read_fs_root(fs_info, &key);
10480 if (IS_ERR(root)) {
10481 err |= REFERENCER_MISSING;
10485 /* Read out the tree block to get item/node key */
10486 eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10487 if (!extent_buffer_uptodate(eb)) {
10488 err |= REFERENCER_MISSING;
10489 free_extent_buffer(eb);
10493 /* Empty tree, no need to check key */
10494 if (!btrfs_header_nritems(eb) && !level) {
10495 free_extent_buffer(eb);
10500 btrfs_node_key_to_cpu(eb, &key, 0);
10502 btrfs_item_key_to_cpu(eb, &key, 0);
10504 free_extent_buffer(eb);
10506 btrfs_init_path(&path);
10507 path.lowest_level = level;
10508 /* Search with the first key, to ensure we can reach it */
10509 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10511 err |= REFERENCER_MISSING;
10515 node = path.nodes[level];
10516 if (btrfs_header_bytenr(node) != bytenr) {
10518 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10519 bytenr, nodesize, bytenr,
10520 btrfs_header_bytenr(node));
10521 err |= REFERENCER_MISMATCH;
10523 if (btrfs_header_level(node) != level) {
10525 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10526 bytenr, nodesize, level,
10527 btrfs_header_level(node));
10528 err |= REFERENCER_MISMATCH;
10532 btrfs_release_path(&path);
10534 if (err & REFERENCER_MISSING) {
10536 error("extent [%llu %d] lost referencer (owner: %llu)",
10537 bytenr, nodesize, root_id);
10540 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10541 bytenr, nodesize, root_id, level);
10548 * Check if tree block @eb is tree reloc root.
10549 * Return 0 if it's not or any problem happens
10550 * Return 1 if it's a tree reloc root
10552 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10553 struct extent_buffer *eb)
10555 struct btrfs_root *tree_reloc_root;
10556 struct btrfs_key key;
10557 u64 bytenr = btrfs_header_bytenr(eb);
10558 u64 owner = btrfs_header_owner(eb);
10561 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10562 key.offset = owner;
10563 key.type = BTRFS_ROOT_ITEM_KEY;
10565 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10566 if (IS_ERR(tree_reloc_root))
10569 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10571 btrfs_free_fs_root(tree_reloc_root);
10576 * Check referencer for shared block backref
10577 * If level == -1, this function will resolve the level.
10579 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10580 u64 parent, u64 bytenr, int level)
10582 struct extent_buffer *eb;
10583 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10585 int found_parent = 0;
10588 eb = read_tree_block(fs_info, parent, nodesize, 0);
10589 if (!extent_buffer_uptodate(eb))
10593 level = query_tree_block_level(fs_info, bytenr);
10597 /* It's possible it's a tree reloc root */
10598 if (parent == bytenr) {
10599 if (is_tree_reloc_root(fs_info, eb))
10604 if (level + 1 != btrfs_header_level(eb))
10607 nr = btrfs_header_nritems(eb);
10608 for (i = 0; i < nr; i++) {
10609 if (bytenr == btrfs_node_blockptr(eb, i)) {
10615 free_extent_buffer(eb);
10616 if (!found_parent) {
10618 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10619 bytenr, nodesize, parent, level);
10620 return REFERENCER_MISSING;
10626 * Check referencer for normal (inlined) data ref
10627 * If len == 0, it will be resolved by searching in extent tree
10629 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10630 u64 root_id, u64 objectid, u64 offset,
10631 u64 bytenr, u64 len, u32 count)
10633 struct btrfs_root *root;
10634 struct btrfs_root *extent_root = fs_info->extent_root;
10635 struct btrfs_key key;
10636 struct btrfs_path path;
10637 struct extent_buffer *leaf;
10638 struct btrfs_file_extent_item *fi;
10639 u32 found_count = 0;
10644 key.objectid = bytenr;
10645 key.type = BTRFS_EXTENT_ITEM_KEY;
10646 key.offset = (u64)-1;
10648 btrfs_init_path(&path);
10649 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10652 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10655 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10656 if (key.objectid != bytenr ||
10657 key.type != BTRFS_EXTENT_ITEM_KEY)
10660 btrfs_release_path(&path);
10662 key.objectid = root_id;
10663 key.type = BTRFS_ROOT_ITEM_KEY;
10664 key.offset = (u64)-1;
10665 btrfs_init_path(&path);
10667 root = btrfs_read_fs_root(fs_info, &key);
10671 key.objectid = objectid;
10672 key.type = BTRFS_EXTENT_DATA_KEY;
10674 * It can be nasty as data backref offset is
10675 * file offset - file extent offset, which is smaller or
10676 * equal to original backref offset. The only special case is
10677 * overflow. So we need to special check and do further search.
10679 key.offset = offset & (1ULL << 63) ? 0 : offset;
10681 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10686 * Search afterwards to get correct one
10687 * NOTE: As we must do a comprehensive check on the data backref to
10688 * make sure the dref count also matches, we must iterate all file
10689 * extents for that inode.
10692 leaf = path.nodes[0];
10693 slot = path.slots[0];
10695 if (slot >= btrfs_header_nritems(leaf))
10697 btrfs_item_key_to_cpu(leaf, &key, slot);
10698 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10700 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10702 * Except normal disk bytenr and disk num bytes, we still
10703 * need to do extra check on dbackref offset as
10704 * dbackref offset = file_offset - file_extent_offset
10706 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10707 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10708 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10713 ret = btrfs_next_item(root, &path);
10718 btrfs_release_path(&path);
10719 if (found_count != count) {
10721 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10722 bytenr, len, root_id, objectid, offset, count, found_count);
10723 return REFERENCER_MISSING;
10729 * Check if the referencer of a shared data backref exists
10731 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10732 u64 parent, u64 bytenr)
10734 struct extent_buffer *eb;
10735 struct btrfs_key key;
10736 struct btrfs_file_extent_item *fi;
10737 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10739 int found_parent = 0;
10742 eb = read_tree_block(fs_info, parent, nodesize, 0);
10743 if (!extent_buffer_uptodate(eb))
10746 nr = btrfs_header_nritems(eb);
10747 for (i = 0; i < nr; i++) {
10748 btrfs_item_key_to_cpu(eb, &key, i);
10749 if (key.type != BTRFS_EXTENT_DATA_KEY)
10752 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10753 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10756 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10763 free_extent_buffer(eb);
10764 if (!found_parent) {
10765 error("shared extent %llu referencer lost (parent: %llu)",
10767 return REFERENCER_MISSING;
10773 * This function will check a given extent item, including its backref and
10774 * itself (like crossing stripe boundary and type)
10776 * Since we don't use extent_record anymore, introduce new error bit
10778 static int check_extent_item(struct btrfs_fs_info *fs_info,
10779 struct extent_buffer *eb, int slot)
10781 struct btrfs_extent_item *ei;
10782 struct btrfs_extent_inline_ref *iref;
10783 struct btrfs_extent_data_ref *dref;
10787 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10788 u32 item_size = btrfs_item_size_nr(eb, slot);
10793 struct btrfs_key key;
10797 btrfs_item_key_to_cpu(eb, &key, slot);
10798 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10799 bytes_used += key.offset;
10801 bytes_used += nodesize;
10803 if (item_size < sizeof(*ei)) {
10805 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10806 * old thing when on disk format is still un-determined.
10807 * No need to care about it anymore
10809 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10813 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10814 flags = btrfs_extent_flags(eb, ei);
10816 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10818 if (metadata && check_crossing_stripes(global_info, key.objectid,
10820 error("bad metadata [%llu, %llu) crossing stripe boundary",
10821 key.objectid, key.objectid + nodesize);
10822 err |= CROSSING_STRIPE_BOUNDARY;
10825 ptr = (unsigned long)(ei + 1);
10827 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10828 /* Old EXTENT_ITEM metadata */
10829 struct btrfs_tree_block_info *info;
10831 info = (struct btrfs_tree_block_info *)ptr;
10832 level = btrfs_tree_block_level(eb, info);
10833 ptr += sizeof(struct btrfs_tree_block_info);
10835 /* New METADATA_ITEM */
10836 level = key.offset;
10838 end = (unsigned long)ei + item_size;
10841 /* Reached extent item end normally */
10845 /* Beyond extent item end, wrong item size */
10847 err |= ITEM_SIZE_MISMATCH;
10848 error("extent item at bytenr %llu slot %d has wrong size",
10853 /* Now check every backref in this extent item */
10854 iref = (struct btrfs_extent_inline_ref *)ptr;
10855 type = btrfs_extent_inline_ref_type(eb, iref);
10856 offset = btrfs_extent_inline_ref_offset(eb, iref);
10858 case BTRFS_TREE_BLOCK_REF_KEY:
10859 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10863 case BTRFS_SHARED_BLOCK_REF_KEY:
10864 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10868 case BTRFS_EXTENT_DATA_REF_KEY:
10869 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10870 ret = check_extent_data_backref(fs_info,
10871 btrfs_extent_data_ref_root(eb, dref),
10872 btrfs_extent_data_ref_objectid(eb, dref),
10873 btrfs_extent_data_ref_offset(eb, dref),
10874 key.objectid, key.offset,
10875 btrfs_extent_data_ref_count(eb, dref));
10878 case BTRFS_SHARED_DATA_REF_KEY:
10879 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10883 error("extent[%llu %d %llu] has unknown ref type: %d",
10884 key.objectid, key.type, key.offset, type);
10885 err |= UNKNOWN_TYPE;
10889 ptr += btrfs_extent_inline_ref_size(type);
10897 * Check if a dev extent item is referred correctly by its chunk
10899 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10900 struct extent_buffer *eb, int slot)
10902 struct btrfs_root *chunk_root = fs_info->chunk_root;
10903 struct btrfs_dev_extent *ptr;
10904 struct btrfs_path path;
10905 struct btrfs_key chunk_key;
10906 struct btrfs_key devext_key;
10907 struct btrfs_chunk *chunk;
10908 struct extent_buffer *l;
10912 int found_chunk = 0;
10915 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10916 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10917 length = btrfs_dev_extent_length(eb, ptr);
10919 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10920 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10921 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10923 btrfs_init_path(&path);
10924 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10929 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10930 ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
10935 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10938 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10939 for (i = 0; i < num_stripes; i++) {
10940 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10941 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10943 if (devid == devext_key.objectid &&
10944 offset == devext_key.offset) {
10950 btrfs_release_path(&path);
10951 if (!found_chunk) {
10953 "device extent[%llu, %llu, %llu] did not find the related chunk",
10954 devext_key.objectid, devext_key.offset, length);
10955 return REFERENCER_MISSING;
10961 * Check if the used space is correct with the dev item
10963 static int check_dev_item(struct btrfs_fs_info *fs_info,
10964 struct extent_buffer *eb, int slot)
10966 struct btrfs_root *dev_root = fs_info->dev_root;
10967 struct btrfs_dev_item *dev_item;
10968 struct btrfs_path path;
10969 struct btrfs_key key;
10970 struct btrfs_dev_extent *ptr;
10976 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10977 dev_id = btrfs_device_id(eb, dev_item);
10978 used = btrfs_device_bytes_used(eb, dev_item);
10980 key.objectid = dev_id;
10981 key.type = BTRFS_DEV_EXTENT_KEY;
10984 btrfs_init_path(&path);
10985 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10987 btrfs_item_key_to_cpu(eb, &key, slot);
10988 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10989 key.objectid, key.type, key.offset);
10990 btrfs_release_path(&path);
10991 return REFERENCER_MISSING;
10994 /* Iterate dev_extents to calculate the used space of a device */
10996 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10999 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11000 if (key.objectid > dev_id)
11002 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11005 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11006 struct btrfs_dev_extent);
11007 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11009 ret = btrfs_next_item(dev_root, &path);
11013 btrfs_release_path(&path);
11015 if (used != total) {
11016 btrfs_item_key_to_cpu(eb, &key, slot);
11018 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11019 total, used, BTRFS_ROOT_TREE_OBJECTID,
11020 BTRFS_DEV_EXTENT_KEY, dev_id);
11021 return ACCOUNTING_MISMATCH;
11027 * Check a block group item with its referener (chunk) and its used space
11028 * with extent/metadata item
11030 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11031 struct extent_buffer *eb, int slot)
11033 struct btrfs_root *extent_root = fs_info->extent_root;
11034 struct btrfs_root *chunk_root = fs_info->chunk_root;
11035 struct btrfs_block_group_item *bi;
11036 struct btrfs_block_group_item bg_item;
11037 struct btrfs_path path;
11038 struct btrfs_key bg_key;
11039 struct btrfs_key chunk_key;
11040 struct btrfs_key extent_key;
11041 struct btrfs_chunk *chunk;
11042 struct extent_buffer *leaf;
11043 struct btrfs_extent_item *ei;
11044 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11052 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11053 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11054 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11055 used = btrfs_block_group_used(&bg_item);
11056 bg_flags = btrfs_block_group_flags(&bg_item);
11058 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11059 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11060 chunk_key.offset = bg_key.objectid;
11062 btrfs_init_path(&path);
11063 /* Search for the referencer chunk */
11064 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11067 "block group[%llu %llu] did not find the related chunk item",
11068 bg_key.objectid, bg_key.offset);
11069 err |= REFERENCER_MISSING;
11071 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11072 struct btrfs_chunk);
11073 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11076 "block group[%llu %llu] related chunk item length does not match",
11077 bg_key.objectid, bg_key.offset);
11078 err |= REFERENCER_MISMATCH;
11081 btrfs_release_path(&path);
11083 /* Search from the block group bytenr */
11084 extent_key.objectid = bg_key.objectid;
11085 extent_key.type = 0;
11086 extent_key.offset = 0;
11088 btrfs_init_path(&path);
11089 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11093 /* Iterate extent tree to account used space */
11095 leaf = path.nodes[0];
11097 /* Search slot can point to the last item beyond leaf nritems */
11098 if (path.slots[0] >= btrfs_header_nritems(leaf))
11101 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11102 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11105 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11106 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11108 if (extent_key.objectid < bg_key.objectid)
11111 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11114 total += extent_key.offset;
11116 ei = btrfs_item_ptr(leaf, path.slots[0],
11117 struct btrfs_extent_item);
11118 flags = btrfs_extent_flags(leaf, ei);
11119 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11120 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11122 "bad extent[%llu, %llu) type mismatch with chunk",
11123 extent_key.objectid,
11124 extent_key.objectid + extent_key.offset);
11125 err |= CHUNK_TYPE_MISMATCH;
11127 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11128 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11129 BTRFS_BLOCK_GROUP_METADATA))) {
11131 "bad extent[%llu, %llu) type mismatch with chunk",
11132 extent_key.objectid,
11133 extent_key.objectid + nodesize);
11134 err |= CHUNK_TYPE_MISMATCH;
11138 ret = btrfs_next_item(extent_root, &path);
11144 btrfs_release_path(&path);
11146 if (total != used) {
11148 "block group[%llu %llu] used %llu but extent items used %llu",
11149 bg_key.objectid, bg_key.offset, used, total);
11150 err |= ACCOUNTING_MISMATCH;
11156 * Check a chunk item.
11157 * Including checking all referred dev_extents and block group
11159 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11160 struct extent_buffer *eb, int slot)
11162 struct btrfs_root *extent_root = fs_info->extent_root;
11163 struct btrfs_root *dev_root = fs_info->dev_root;
11164 struct btrfs_path path;
11165 struct btrfs_key chunk_key;
11166 struct btrfs_key bg_key;
11167 struct btrfs_key devext_key;
11168 struct btrfs_chunk *chunk;
11169 struct extent_buffer *leaf;
11170 struct btrfs_block_group_item *bi;
11171 struct btrfs_block_group_item bg_item;
11172 struct btrfs_dev_extent *ptr;
11184 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11185 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11186 length = btrfs_chunk_length(eb, chunk);
11187 chunk_end = chunk_key.offset + length;
11188 ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
11191 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11193 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11196 type = btrfs_chunk_type(eb, chunk);
11198 bg_key.objectid = chunk_key.offset;
11199 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11200 bg_key.offset = length;
11202 btrfs_init_path(&path);
11203 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11206 "chunk[%llu %llu) did not find the related block group item",
11207 chunk_key.offset, chunk_end);
11208 err |= REFERENCER_MISSING;
11210 leaf = path.nodes[0];
11211 bi = btrfs_item_ptr(leaf, path.slots[0],
11212 struct btrfs_block_group_item);
11213 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11215 if (btrfs_block_group_flags(&bg_item) != type) {
11217 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11218 chunk_key.offset, chunk_end, type,
11219 btrfs_block_group_flags(&bg_item));
11220 err |= REFERENCER_MISSING;
11224 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11225 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11226 for (i = 0; i < num_stripes; i++) {
11227 btrfs_release_path(&path);
11228 btrfs_init_path(&path);
11229 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11230 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11231 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11233 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11236 goto not_match_dev;
11238 leaf = path.nodes[0];
11239 ptr = btrfs_item_ptr(leaf, path.slots[0],
11240 struct btrfs_dev_extent);
11241 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11242 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11243 if (objectid != chunk_key.objectid ||
11244 offset != chunk_key.offset ||
11245 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11246 goto not_match_dev;
11249 err |= BACKREF_MISSING;
11251 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11252 chunk_key.objectid, chunk_end, i);
11255 btrfs_release_path(&path);
11261 * Main entry function to check known items and update related accounting info
11263 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11265 struct btrfs_fs_info *fs_info = root->fs_info;
11266 struct btrfs_key key;
11269 struct btrfs_extent_data_ref *dref;
11274 btrfs_item_key_to_cpu(eb, &key, slot);
11278 case BTRFS_EXTENT_DATA_KEY:
11279 ret = check_extent_data_item(root, eb, slot);
11282 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11283 ret = check_block_group_item(fs_info, eb, slot);
11286 case BTRFS_DEV_ITEM_KEY:
11287 ret = check_dev_item(fs_info, eb, slot);
11290 case BTRFS_CHUNK_ITEM_KEY:
11291 ret = check_chunk_item(fs_info, eb, slot);
11294 case BTRFS_DEV_EXTENT_KEY:
11295 ret = check_dev_extent_item(fs_info, eb, slot);
11298 case BTRFS_EXTENT_ITEM_KEY:
11299 case BTRFS_METADATA_ITEM_KEY:
11300 ret = check_extent_item(fs_info, eb, slot);
11303 case BTRFS_EXTENT_CSUM_KEY:
11304 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11306 case BTRFS_TREE_BLOCK_REF_KEY:
11307 ret = check_tree_block_backref(fs_info, key.offset,
11311 case BTRFS_EXTENT_DATA_REF_KEY:
11312 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11313 ret = check_extent_data_backref(fs_info,
11314 btrfs_extent_data_ref_root(eb, dref),
11315 btrfs_extent_data_ref_objectid(eb, dref),
11316 btrfs_extent_data_ref_offset(eb, dref),
11318 btrfs_extent_data_ref_count(eb, dref));
11321 case BTRFS_SHARED_BLOCK_REF_KEY:
11322 ret = check_shared_block_backref(fs_info, key.offset,
11326 case BTRFS_SHARED_DATA_REF_KEY:
11327 ret = check_shared_data_backref(fs_info, key.offset,
11335 if (++slot < btrfs_header_nritems(eb))
11342 * Helper function for later fs/subvol tree check. To determine if a tree
11343 * block should be checked.
11344 * This function will ensure only the direct referencer with lowest rootid to
11345 * check a fs/subvolume tree block.
11347 * Backref check at extent tree would detect errors like missing subvolume
11348 * tree, so we can do aggressive check to reduce duplicated checks.
11350 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11352 struct btrfs_root *extent_root = root->fs_info->extent_root;
11353 struct btrfs_key key;
11354 struct btrfs_path path;
11355 struct extent_buffer *leaf;
11357 struct btrfs_extent_item *ei;
11363 struct btrfs_extent_inline_ref *iref;
11366 btrfs_init_path(&path);
11367 key.objectid = btrfs_header_bytenr(eb);
11368 key.type = BTRFS_METADATA_ITEM_KEY;
11369 key.offset = (u64)-1;
11372 * Any failure in backref resolving means we can't determine
11373 * whom the tree block belongs to.
11374 * So in that case, we need to check that tree block
11376 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11380 ret = btrfs_previous_extent_item(extent_root, &path,
11381 btrfs_header_bytenr(eb));
11385 leaf = path.nodes[0];
11386 slot = path.slots[0];
11387 btrfs_item_key_to_cpu(leaf, &key, slot);
11388 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11390 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11391 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11393 struct btrfs_tree_block_info *info;
11395 info = (struct btrfs_tree_block_info *)(ei + 1);
11396 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11399 item_size = btrfs_item_size_nr(leaf, slot);
11400 ptr = (unsigned long)iref;
11401 end = (unsigned long)ei + item_size;
11402 while (ptr < end) {
11403 iref = (struct btrfs_extent_inline_ref *)ptr;
11404 type = btrfs_extent_inline_ref_type(leaf, iref);
11405 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11408 * We only check the tree block if current root is
11409 * the lowest referencer of it.
11411 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11412 offset < root->objectid) {
11413 btrfs_release_path(&path);
11417 ptr += btrfs_extent_inline_ref_size(type);
11420 * Normally we should also check keyed tree block ref, but that may be
11421 * very time consuming. Inlined ref should already make us skip a lot
11422 * of refs now. So skip search keyed tree block ref.
11426 btrfs_release_path(&path);
11431 * Traversal function for tree block. We will do:
11432 * 1) Skip shared fs/subvolume tree blocks
11433 * 2) Update related bytes accounting
11434 * 3) Pre-order traversal
11436 static int traverse_tree_block(struct btrfs_root *root,
11437 struct extent_buffer *node)
11439 struct extent_buffer *eb;
11440 struct btrfs_key key;
11441 struct btrfs_key drop_key;
11449 * Skip shared fs/subvolume tree block, in that case they will
11450 * be checked by referencer with lowest rootid
11452 if (is_fstree(root->objectid) && !should_check(root, node))
11455 /* Update bytes accounting */
11456 total_btree_bytes += node->len;
11457 if (fs_root_objectid(btrfs_header_owner(node)))
11458 total_fs_tree_bytes += node->len;
11459 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11460 total_extent_tree_bytes += node->len;
11461 if (!found_old_backref &&
11462 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11463 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11464 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11465 found_old_backref = 1;
11467 /* pre-order tranversal, check itself first */
11468 level = btrfs_header_level(node);
11469 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11470 btrfs_header_level(node),
11471 btrfs_header_owner(node));
11475 "check %s failed root %llu bytenr %llu level %d, force continue check",
11476 level ? "node":"leaf", root->objectid,
11477 btrfs_header_bytenr(node), btrfs_header_level(node));
11480 btree_space_waste += btrfs_leaf_free_space(root, node);
11481 ret = check_leaf_items(root, node);
11486 nr = btrfs_header_nritems(node);
11487 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11488 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11489 sizeof(struct btrfs_key_ptr);
11491 /* Then check all its children */
11492 for (i = 0; i < nr; i++) {
11493 u64 blocknr = btrfs_node_blockptr(node, i);
11495 btrfs_node_key_to_cpu(node, &key, i);
11496 if (level == root->root_item.drop_level &&
11497 is_dropped_key(&key, &drop_key))
11501 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11502 * to call the function itself.
11504 eb = read_tree_block(root->fs_info, blocknr,
11505 root->fs_info->nodesize, 0);
11506 if (extent_buffer_uptodate(eb)) {
11507 ret = traverse_tree_block(root, eb);
11510 free_extent_buffer(eb);
11517 * Low memory usage version check_chunks_and_extents.
11519 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11521 struct btrfs_path path;
11522 struct btrfs_key key;
11523 struct btrfs_root *root1;
11524 struct btrfs_root *cur_root;
11528 root1 = root->fs_info->chunk_root;
11529 ret = traverse_tree_block(root1, root1->node);
11532 root1 = root->fs_info->tree_root;
11533 ret = traverse_tree_block(root1, root1->node);
11536 btrfs_init_path(&path);
11537 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11539 key.type = BTRFS_ROOT_ITEM_KEY;
11541 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11543 error("cannot find extent treet in tree_root");
11548 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11549 if (key.type != BTRFS_ROOT_ITEM_KEY)
11551 key.offset = (u64)-1;
11553 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11554 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11557 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11558 if (IS_ERR(cur_root) || !cur_root) {
11559 error("failed to read tree: %lld", key.objectid);
11563 ret = traverse_tree_block(cur_root, cur_root->node);
11566 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11567 btrfs_free_fs_root(cur_root);
11569 ret = btrfs_next_item(root1, &path);
11575 btrfs_release_path(&path);
11579 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11580 struct btrfs_root *root, int overwrite)
11582 struct extent_buffer *c;
11583 struct extent_buffer *old = root->node;
11586 struct btrfs_disk_key disk_key = {0,0,0};
11592 extent_buffer_get(c);
11595 c = btrfs_alloc_free_block(trans, root,
11596 root->fs_info->nodesize,
11597 root->root_key.objectid,
11598 &disk_key, level, 0, 0);
11601 extent_buffer_get(c);
11605 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11606 btrfs_set_header_level(c, level);
11607 btrfs_set_header_bytenr(c, c->start);
11608 btrfs_set_header_generation(c, trans->transid);
11609 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11610 btrfs_set_header_owner(c, root->root_key.objectid);
11612 write_extent_buffer(c, root->fs_info->fsid,
11613 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11615 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11616 btrfs_header_chunk_tree_uuid(c),
11619 btrfs_mark_buffer_dirty(c);
11621 * this case can happen in the following case:
11623 * 1.overwrite previous root.
11625 * 2.reinit reloc data root, this is because we skip pin
11626 * down reloc data tree before which means we can allocate
11627 * same block bytenr here.
11629 if (old->start == c->start) {
11630 btrfs_set_root_generation(&root->root_item,
11632 root->root_item.level = btrfs_header_level(root->node);
11633 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11634 &root->root_key, &root->root_item);
11636 free_extent_buffer(c);
11640 free_extent_buffer(old);
11642 add_root_to_dirty_list(root);
11646 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11647 struct extent_buffer *eb, int tree_root)
11649 struct extent_buffer *tmp;
11650 struct btrfs_root_item *ri;
11651 struct btrfs_key key;
11654 int level = btrfs_header_level(eb);
11660 * If we have pinned this block before, don't pin it again.
11661 * This can not only avoid forever loop with broken filesystem
11662 * but also give us some speedups.
11664 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11665 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11668 btrfs_pin_extent(fs_info, eb->start, eb->len);
11670 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11671 nritems = btrfs_header_nritems(eb);
11672 for (i = 0; i < nritems; i++) {
11674 btrfs_item_key_to_cpu(eb, &key, i);
11675 if (key.type != BTRFS_ROOT_ITEM_KEY)
11677 /* Skip the extent root and reloc roots */
11678 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11679 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11680 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11682 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11683 bytenr = btrfs_disk_root_bytenr(eb, ri);
11686 * If at any point we start needing the real root we
11687 * will have to build a stump root for the root we are
11688 * in, but for now this doesn't actually use the root so
11689 * just pass in extent_root.
11691 tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11692 if (!extent_buffer_uptodate(tmp)) {
11693 fprintf(stderr, "Error reading root block\n");
11696 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11697 free_extent_buffer(tmp);
11701 bytenr = btrfs_node_blockptr(eb, i);
11703 /* If we aren't the tree root don't read the block */
11704 if (level == 1 && !tree_root) {
11705 btrfs_pin_extent(fs_info, bytenr, nodesize);
11709 tmp = read_tree_block(fs_info, bytenr,
11711 if (!extent_buffer_uptodate(tmp)) {
11712 fprintf(stderr, "Error reading tree block\n");
11715 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11716 free_extent_buffer(tmp);
11725 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11729 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11733 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11736 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11738 struct btrfs_block_group_cache *cache;
11739 struct btrfs_path path;
11740 struct extent_buffer *leaf;
11741 struct btrfs_chunk *chunk;
11742 struct btrfs_key key;
11746 btrfs_init_path(&path);
11748 key.type = BTRFS_CHUNK_ITEM_KEY;
11750 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11752 btrfs_release_path(&path);
11757 * We do this in case the block groups were screwed up and had alloc
11758 * bits that aren't actually set on the chunks. This happens with
11759 * restored images every time and could happen in real life I guess.
11761 fs_info->avail_data_alloc_bits = 0;
11762 fs_info->avail_metadata_alloc_bits = 0;
11763 fs_info->avail_system_alloc_bits = 0;
11765 /* First we need to create the in-memory block groups */
11767 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11768 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11770 btrfs_release_path(&path);
11778 leaf = path.nodes[0];
11779 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11780 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11785 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11786 btrfs_add_block_group(fs_info, 0,
11787 btrfs_chunk_type(leaf, chunk),
11788 key.objectid, key.offset,
11789 btrfs_chunk_length(leaf, chunk));
11790 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11791 key.offset + btrfs_chunk_length(leaf, chunk));
11796 cache = btrfs_lookup_first_block_group(fs_info, start);
11800 start = cache->key.objectid + cache->key.offset;
11803 btrfs_release_path(&path);
11807 static int reset_balance(struct btrfs_trans_handle *trans,
11808 struct btrfs_fs_info *fs_info)
11810 struct btrfs_root *root = fs_info->tree_root;
11811 struct btrfs_path path;
11812 struct extent_buffer *leaf;
11813 struct btrfs_key key;
11814 int del_slot, del_nr = 0;
11818 btrfs_init_path(&path);
11819 key.objectid = BTRFS_BALANCE_OBJECTID;
11820 key.type = BTRFS_BALANCE_ITEM_KEY;
11822 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11827 goto reinit_data_reloc;
11832 ret = btrfs_del_item(trans, root, &path);
11835 btrfs_release_path(&path);
11837 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11838 key.type = BTRFS_ROOT_ITEM_KEY;
11840 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11844 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11849 ret = btrfs_del_items(trans, root, &path,
11856 btrfs_release_path(&path);
11859 ret = btrfs_search_slot(trans, root, &key, &path,
11866 leaf = path.nodes[0];
11867 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11868 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11870 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11875 del_slot = path.slots[0];
11884 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11888 btrfs_release_path(&path);
11891 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11892 key.type = BTRFS_ROOT_ITEM_KEY;
11893 key.offset = (u64)-1;
11894 root = btrfs_read_fs_root(fs_info, &key);
11895 if (IS_ERR(root)) {
11896 fprintf(stderr, "Error reading data reloc tree\n");
11897 ret = PTR_ERR(root);
11900 record_root_in_trans(trans, root);
11901 ret = btrfs_fsck_reinit_root(trans, root, 0);
11904 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11906 btrfs_release_path(&path);
11910 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11911 struct btrfs_fs_info *fs_info)
11917 * The only reason we don't do this is because right now we're just
11918 * walking the trees we find and pinning down their bytes, we don't look
11919 * at any of the leaves. In order to do mixed groups we'd have to check
11920 * the leaves of any fs roots and pin down the bytes for any file
11921 * extents we find. Not hard but why do it if we don't have to?
11923 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11924 fprintf(stderr, "We don't support re-initing the extent tree "
11925 "for mixed block groups yet, please notify a btrfs "
11926 "developer you want to do this so they can add this "
11927 "functionality.\n");
11932 * first we need to walk all of the trees except the extent tree and pin
11933 * down the bytes that are in use so we don't overwrite any existing
11936 ret = pin_metadata_blocks(fs_info);
11938 fprintf(stderr, "error pinning down used bytes\n");
11943 * Need to drop all the block groups since we're going to recreate all
11946 btrfs_free_block_groups(fs_info);
11947 ret = reset_block_groups(fs_info);
11949 fprintf(stderr, "error resetting the block groups\n");
11953 /* Ok we can allocate now, reinit the extent root */
11954 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11956 fprintf(stderr, "extent root initialization failed\n");
11958 * When the transaction code is updated we should end the
11959 * transaction, but for now progs only knows about commit so
11960 * just return an error.
11966 * Now we have all the in-memory block groups setup so we can make
11967 * allocations properly, and the metadata we care about is safe since we
11968 * pinned all of it above.
11971 struct btrfs_block_group_cache *cache;
11973 cache = btrfs_lookup_first_block_group(fs_info, start);
11976 start = cache->key.objectid + cache->key.offset;
11977 ret = btrfs_insert_item(trans, fs_info->extent_root,
11978 &cache->key, &cache->item,
11979 sizeof(cache->item));
11981 fprintf(stderr, "Error adding block group\n");
11984 btrfs_extent_post_op(trans, fs_info->extent_root);
11987 ret = reset_balance(trans, fs_info);
11989 fprintf(stderr, "error resetting the pending balance\n");
11994 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11996 struct btrfs_path path;
11997 struct btrfs_trans_handle *trans;
11998 struct btrfs_key key;
12001 printf("Recowing metadata block %llu\n", eb->start);
12002 key.objectid = btrfs_header_owner(eb);
12003 key.type = BTRFS_ROOT_ITEM_KEY;
12004 key.offset = (u64)-1;
12006 root = btrfs_read_fs_root(root->fs_info, &key);
12007 if (IS_ERR(root)) {
12008 fprintf(stderr, "Couldn't find owner root %llu\n",
12010 return PTR_ERR(root);
12013 trans = btrfs_start_transaction(root, 1);
12015 return PTR_ERR(trans);
12017 btrfs_init_path(&path);
12018 path.lowest_level = btrfs_header_level(eb);
12019 if (path.lowest_level)
12020 btrfs_node_key_to_cpu(eb, &key, 0);
12022 btrfs_item_key_to_cpu(eb, &key, 0);
12024 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12025 btrfs_commit_transaction(trans, root);
12026 btrfs_release_path(&path);
12030 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12032 struct btrfs_path path;
12033 struct btrfs_trans_handle *trans;
12034 struct btrfs_key key;
12037 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12038 bad->key.type, bad->key.offset);
12039 key.objectid = bad->root_id;
12040 key.type = BTRFS_ROOT_ITEM_KEY;
12041 key.offset = (u64)-1;
12043 root = btrfs_read_fs_root(root->fs_info, &key);
12044 if (IS_ERR(root)) {
12045 fprintf(stderr, "Couldn't find owner root %llu\n",
12047 return PTR_ERR(root);
12050 trans = btrfs_start_transaction(root, 1);
12052 return PTR_ERR(trans);
12054 btrfs_init_path(&path);
12055 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12061 ret = btrfs_del_item(trans, root, &path);
12063 btrfs_commit_transaction(trans, root);
12064 btrfs_release_path(&path);
12068 static int zero_log_tree(struct btrfs_root *root)
12070 struct btrfs_trans_handle *trans;
12073 trans = btrfs_start_transaction(root, 1);
12074 if (IS_ERR(trans)) {
12075 ret = PTR_ERR(trans);
12078 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12079 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12080 ret = btrfs_commit_transaction(trans, root);
12084 static int populate_csum(struct btrfs_trans_handle *trans,
12085 struct btrfs_root *csum_root, char *buf, u64 start,
12088 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12093 while (offset < len) {
12094 sectorsize = fs_info->sectorsize;
12095 ret = read_extent_data(fs_info, buf, start + offset,
12099 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12100 start + offset, buf, sectorsize);
12103 offset += sectorsize;
12108 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12109 struct btrfs_root *csum_root,
12110 struct btrfs_root *cur_root)
12112 struct btrfs_path path;
12113 struct btrfs_key key;
12114 struct extent_buffer *node;
12115 struct btrfs_file_extent_item *fi;
12122 buf = malloc(cur_root->fs_info->sectorsize);
12126 btrfs_init_path(&path);
12130 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12133 /* Iterate all regular file extents and fill its csum */
12135 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12137 if (key.type != BTRFS_EXTENT_DATA_KEY)
12139 node = path.nodes[0];
12140 slot = path.slots[0];
12141 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12142 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12144 start = btrfs_file_extent_disk_bytenr(node, fi);
12145 len = btrfs_file_extent_disk_num_bytes(node, fi);
12147 ret = populate_csum(trans, csum_root, buf, start, len);
12148 if (ret == -EEXIST)
12154 * TODO: if next leaf is corrupted, jump to nearest next valid
12157 ret = btrfs_next_item(cur_root, &path);
12167 btrfs_release_path(&path);
12172 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12173 struct btrfs_root *csum_root)
12175 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12176 struct btrfs_path path;
12177 struct btrfs_root *tree_root = fs_info->tree_root;
12178 struct btrfs_root *cur_root;
12179 struct extent_buffer *node;
12180 struct btrfs_key key;
12184 btrfs_init_path(&path);
12185 key.objectid = BTRFS_FS_TREE_OBJECTID;
12187 key.type = BTRFS_ROOT_ITEM_KEY;
12188 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12197 node = path.nodes[0];
12198 slot = path.slots[0];
12199 btrfs_item_key_to_cpu(node, &key, slot);
12200 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12202 if (key.type != BTRFS_ROOT_ITEM_KEY)
12204 if (!is_fstree(key.objectid))
12206 key.offset = (u64)-1;
12208 cur_root = btrfs_read_fs_root(fs_info, &key);
12209 if (IS_ERR(cur_root) || !cur_root) {
12210 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12214 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12219 ret = btrfs_next_item(tree_root, &path);
12229 btrfs_release_path(&path);
12233 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12234 struct btrfs_root *csum_root)
12236 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12237 struct btrfs_path path;
12238 struct btrfs_extent_item *ei;
12239 struct extent_buffer *leaf;
12241 struct btrfs_key key;
12244 btrfs_init_path(&path);
12246 key.type = BTRFS_EXTENT_ITEM_KEY;
12248 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12250 btrfs_release_path(&path);
12254 buf = malloc(csum_root->fs_info->sectorsize);
12256 btrfs_release_path(&path);
12261 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12262 ret = btrfs_next_leaf(extent_root, &path);
12270 leaf = path.nodes[0];
12272 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12273 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12278 ei = btrfs_item_ptr(leaf, path.slots[0],
12279 struct btrfs_extent_item);
12280 if (!(btrfs_extent_flags(leaf, ei) &
12281 BTRFS_EXTENT_FLAG_DATA)) {
12286 ret = populate_csum(trans, csum_root, buf, key.objectid,
12293 btrfs_release_path(&path);
12299 * Recalculate the csum and put it into the csum tree.
12301 * Extent tree init will wipe out all the extent info, so in that case, we
12302 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12303 * will use fs/subvol trees to init the csum tree.
12305 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12306 struct btrfs_root *csum_root,
12307 int search_fs_tree)
12309 if (search_fs_tree)
12310 return fill_csum_tree_from_fs(trans, csum_root);
12312 return fill_csum_tree_from_extent(trans, csum_root);
12315 static void free_roots_info_cache(void)
12317 if (!roots_info_cache)
12320 while (!cache_tree_empty(roots_info_cache)) {
12321 struct cache_extent *entry;
12322 struct root_item_info *rii;
12324 entry = first_cache_extent(roots_info_cache);
12327 remove_cache_extent(roots_info_cache, entry);
12328 rii = container_of(entry, struct root_item_info, cache_extent);
12332 free(roots_info_cache);
12333 roots_info_cache = NULL;
12336 static int build_roots_info_cache(struct btrfs_fs_info *info)
12339 struct btrfs_key key;
12340 struct extent_buffer *leaf;
12341 struct btrfs_path path;
12343 if (!roots_info_cache) {
12344 roots_info_cache = malloc(sizeof(*roots_info_cache));
12345 if (!roots_info_cache)
12347 cache_tree_init(roots_info_cache);
12350 btrfs_init_path(&path);
12352 key.type = BTRFS_EXTENT_ITEM_KEY;
12354 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12357 leaf = path.nodes[0];
12360 struct btrfs_key found_key;
12361 struct btrfs_extent_item *ei;
12362 struct btrfs_extent_inline_ref *iref;
12363 int slot = path.slots[0];
12368 struct cache_extent *entry;
12369 struct root_item_info *rii;
12371 if (slot >= btrfs_header_nritems(leaf)) {
12372 ret = btrfs_next_leaf(info->extent_root, &path);
12379 leaf = path.nodes[0];
12380 slot = path.slots[0];
12383 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12385 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12386 found_key.type != BTRFS_METADATA_ITEM_KEY)
12389 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12390 flags = btrfs_extent_flags(leaf, ei);
12392 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12393 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12396 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12397 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12398 level = found_key.offset;
12400 struct btrfs_tree_block_info *binfo;
12402 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12403 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12404 level = btrfs_tree_block_level(leaf, binfo);
12408 * For a root extent, it must be of the following type and the
12409 * first (and only one) iref in the item.
12411 type = btrfs_extent_inline_ref_type(leaf, iref);
12412 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12415 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12416 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12418 rii = malloc(sizeof(struct root_item_info));
12423 rii->cache_extent.start = root_id;
12424 rii->cache_extent.size = 1;
12425 rii->level = (u8)-1;
12426 entry = &rii->cache_extent;
12427 ret = insert_cache_extent(roots_info_cache, entry);
12430 rii = container_of(entry, struct root_item_info,
12434 ASSERT(rii->cache_extent.start == root_id);
12435 ASSERT(rii->cache_extent.size == 1);
12437 if (level > rii->level || rii->level == (u8)-1) {
12438 rii->level = level;
12439 rii->bytenr = found_key.objectid;
12440 rii->gen = btrfs_extent_generation(leaf, ei);
12441 rii->node_count = 1;
12442 } else if (level == rii->level) {
12450 btrfs_release_path(&path);
12455 static int maybe_repair_root_item(struct btrfs_path *path,
12456 const struct btrfs_key *root_key,
12457 const int read_only_mode)
12459 const u64 root_id = root_key->objectid;
12460 struct cache_extent *entry;
12461 struct root_item_info *rii;
12462 struct btrfs_root_item ri;
12463 unsigned long offset;
12465 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12468 "Error: could not find extent items for root %llu\n",
12469 root_key->objectid);
12473 rii = container_of(entry, struct root_item_info, cache_extent);
12474 ASSERT(rii->cache_extent.start == root_id);
12475 ASSERT(rii->cache_extent.size == 1);
12477 if (rii->node_count != 1) {
12479 "Error: could not find btree root extent for root %llu\n",
12484 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12485 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12487 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12488 btrfs_root_level(&ri) != rii->level ||
12489 btrfs_root_generation(&ri) != rii->gen) {
12492 * If we're in repair mode but our caller told us to not update
12493 * the root item, i.e. just check if it needs to be updated, don't
12494 * print this message, since the caller will call us again shortly
12495 * for the same root item without read only mode (the caller will
12496 * open a transaction first).
12498 if (!(read_only_mode && repair))
12500 "%sroot item for root %llu,"
12501 " current bytenr %llu, current gen %llu, current level %u,"
12502 " new bytenr %llu, new gen %llu, new level %u\n",
12503 (read_only_mode ? "" : "fixing "),
12505 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12506 btrfs_root_level(&ri),
12507 rii->bytenr, rii->gen, rii->level);
12509 if (btrfs_root_generation(&ri) > rii->gen) {
12511 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12512 root_id, btrfs_root_generation(&ri), rii->gen);
12516 if (!read_only_mode) {
12517 btrfs_set_root_bytenr(&ri, rii->bytenr);
12518 btrfs_set_root_level(&ri, rii->level);
12519 btrfs_set_root_generation(&ri, rii->gen);
12520 write_extent_buffer(path->nodes[0], &ri,
12521 offset, sizeof(ri));
12531 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12532 * caused read-only snapshots to be corrupted if they were created at a moment
12533 * when the source subvolume/snapshot had orphan items. The issue was that the
12534 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12535 * node instead of the post orphan cleanup root node.
12536 * So this function, and its callees, just detects and fixes those cases. Even
12537 * though the regression was for read-only snapshots, this function applies to
12538 * any snapshot/subvolume root.
12539 * This must be run before any other repair code - not doing it so, makes other
12540 * repair code delete or modify backrefs in the extent tree for example, which
12541 * will result in an inconsistent fs after repairing the root items.
12543 static int repair_root_items(struct btrfs_fs_info *info)
12545 struct btrfs_path path;
12546 struct btrfs_key key;
12547 struct extent_buffer *leaf;
12548 struct btrfs_trans_handle *trans = NULL;
12551 int need_trans = 0;
12553 btrfs_init_path(&path);
12555 ret = build_roots_info_cache(info);
12559 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12560 key.type = BTRFS_ROOT_ITEM_KEY;
12565 * Avoid opening and committing transactions if a leaf doesn't have
12566 * any root items that need to be fixed, so that we avoid rotating
12567 * backup roots unnecessarily.
12570 trans = btrfs_start_transaction(info->tree_root, 1);
12571 if (IS_ERR(trans)) {
12572 ret = PTR_ERR(trans);
12577 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12581 leaf = path.nodes[0];
12584 struct btrfs_key found_key;
12586 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12587 int no_more_keys = find_next_key(&path, &key);
12589 btrfs_release_path(&path);
12591 ret = btrfs_commit_transaction(trans,
12603 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12605 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12607 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12610 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12614 if (!trans && repair) {
12617 btrfs_release_path(&path);
12627 free_roots_info_cache();
12628 btrfs_release_path(&path);
12630 btrfs_commit_transaction(trans, info->tree_root);
12637 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12639 struct btrfs_trans_handle *trans;
12640 struct btrfs_block_group_cache *bg_cache;
12644 /* Clear all free space cache inodes and its extent data */
12646 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12649 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12652 current = bg_cache->key.objectid + bg_cache->key.offset;
12655 /* Don't forget to set cache_generation to -1 */
12656 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12657 if (IS_ERR(trans)) {
12658 error("failed to update super block cache generation");
12659 return PTR_ERR(trans);
12661 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12662 btrfs_commit_transaction(trans, fs_info->tree_root);
12667 const char * const cmd_check_usage[] = {
12668 "btrfs check [options] <device>",
12669 "Check structural integrity of a filesystem (unmounted).",
12670 "Check structural integrity of an unmounted filesystem. Verify internal",
12671 "trees' consistency and item connectivity. In the repair mode try to",
12672 "fix the problems found. ",
12673 "WARNING: the repair mode is considered dangerous",
12675 "-s|--super <superblock> use this superblock copy",
12676 "-b|--backup use the first valid backup root copy",
12677 "--repair try to repair the filesystem",
12678 "--readonly run in read-only mode (default)",
12679 "--init-csum-tree create a new CRC tree",
12680 "--init-extent-tree create a new extent tree",
12681 "--mode <MODE> allows choice of memory/IO trade-offs",
12682 " where MODE is one of:",
12683 " original - read inodes and extents to memory (requires",
12684 " more memory, does less IO)",
12685 " lowmem - try to use less memory but read blocks again",
12687 "--check-data-csum verify checksums of data blocks",
12688 "-Q|--qgroup-report print a report on qgroup consistency",
12689 "-E|--subvol-extents <subvolid>",
12690 " print subvolume extents and sharing state",
12691 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12692 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12693 "-p|--progress indicate progress",
12694 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12698 int cmd_check(int argc, char **argv)
12700 struct cache_tree root_cache;
12701 struct btrfs_root *root;
12702 struct btrfs_fs_info *info;
12705 u64 tree_root_bytenr = 0;
12706 u64 chunk_root_bytenr = 0;
12707 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12711 int init_csum_tree = 0;
12713 int clear_space_cache = 0;
12714 int qgroup_report = 0;
12715 int qgroups_repaired = 0;
12716 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12720 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12721 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12722 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12723 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12724 static const struct option long_options[] = {
12725 { "super", required_argument, NULL, 's' },
12726 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12727 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12728 { "init-csum-tree", no_argument, NULL,
12729 GETOPT_VAL_INIT_CSUM },
12730 { "init-extent-tree", no_argument, NULL,
12731 GETOPT_VAL_INIT_EXTENT },
12732 { "check-data-csum", no_argument, NULL,
12733 GETOPT_VAL_CHECK_CSUM },
12734 { "backup", no_argument, NULL, 'b' },
12735 { "subvol-extents", required_argument, NULL, 'E' },
12736 { "qgroup-report", no_argument, NULL, 'Q' },
12737 { "tree-root", required_argument, NULL, 'r' },
12738 { "chunk-root", required_argument, NULL,
12739 GETOPT_VAL_CHUNK_TREE },
12740 { "progress", no_argument, NULL, 'p' },
12741 { "mode", required_argument, NULL,
12743 { "clear-space-cache", required_argument, NULL,
12744 GETOPT_VAL_CLEAR_SPACE_CACHE},
12745 { NULL, 0, NULL, 0}
12748 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12752 case 'a': /* ignored */ break;
12754 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12757 num = arg_strtou64(optarg);
12758 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12760 "super mirror should be less than %d",
12761 BTRFS_SUPER_MIRROR_MAX);
12764 bytenr = btrfs_sb_offset(((int)num));
12765 printf("using SB copy %llu, bytenr %llu\n", num,
12766 (unsigned long long)bytenr);
12772 subvolid = arg_strtou64(optarg);
12775 tree_root_bytenr = arg_strtou64(optarg);
12777 case GETOPT_VAL_CHUNK_TREE:
12778 chunk_root_bytenr = arg_strtou64(optarg);
12781 ctx.progress_enabled = true;
12785 usage(cmd_check_usage);
12786 case GETOPT_VAL_REPAIR:
12787 printf("enabling repair mode\n");
12789 ctree_flags |= OPEN_CTREE_WRITES;
12791 case GETOPT_VAL_READONLY:
12794 case GETOPT_VAL_INIT_CSUM:
12795 printf("Creating a new CRC tree\n");
12796 init_csum_tree = 1;
12798 ctree_flags |= OPEN_CTREE_WRITES;
12800 case GETOPT_VAL_INIT_EXTENT:
12801 init_extent_tree = 1;
12802 ctree_flags |= (OPEN_CTREE_WRITES |
12803 OPEN_CTREE_NO_BLOCK_GROUPS);
12806 case GETOPT_VAL_CHECK_CSUM:
12807 check_data_csum = 1;
12809 case GETOPT_VAL_MODE:
12810 check_mode = parse_check_mode(optarg);
12811 if (check_mode == CHECK_MODE_UNKNOWN) {
12812 error("unknown mode: %s", optarg);
12816 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12817 if (strcmp(optarg, "v1") == 0) {
12818 clear_space_cache = 1;
12819 } else if (strcmp(optarg, "v2") == 0) {
12820 clear_space_cache = 2;
12821 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12824 "invalid argument to --clear-space-cache, must be v1 or v2");
12827 ctree_flags |= OPEN_CTREE_WRITES;
12832 if (check_argc_exact(argc - optind, 1))
12833 usage(cmd_check_usage);
12835 if (ctx.progress_enabled) {
12836 ctx.tp = TASK_NOTHING;
12837 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12840 /* This check is the only reason for --readonly to exist */
12841 if (readonly && repair) {
12842 error("repair options are not compatible with --readonly");
12847 * Not supported yet
12849 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12850 error("low memory mode doesn't support repair yet");
12855 cache_tree_init(&root_cache);
12857 if((ret = check_mounted(argv[optind])) < 0) {
12858 error("could not check mount status: %s", strerror(-ret));
12862 error("%s is currently mounted, aborting", argv[optind]);
12868 /* only allow partial opening under repair mode */
12870 ctree_flags |= OPEN_CTREE_PARTIAL;
12872 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12873 chunk_root_bytenr, ctree_flags);
12875 error("cannot open file system");
12881 global_info = info;
12882 root = info->fs_root;
12883 if (clear_space_cache == 1) {
12884 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12886 "free space cache v2 detected, use --clear-space-cache v2");
12890 printf("Clearing free space cache\n");
12891 ret = clear_free_space_cache(info);
12893 error("failed to clear free space cache");
12896 printf("Free space cache cleared\n");
12899 } else if (clear_space_cache == 2) {
12900 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12901 printf("no free space cache v2 to clear\n");
12905 printf("Clear free space cache v2\n");
12906 ret = btrfs_clear_free_space_tree(info);
12908 error("failed to clear free space cache v2: %d", ret);
12911 printf("free space cache v2 cleared\n");
12917 * repair mode will force us to commit transaction which
12918 * will make us fail to load log tree when mounting.
12920 if (repair && btrfs_super_log_root(info->super_copy)) {
12921 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12927 ret = zero_log_tree(root);
12930 error("failed to zero log tree: %d", ret);
12935 uuid_unparse(info->super_copy->fsid, uuidbuf);
12936 if (qgroup_report) {
12937 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12939 ret = qgroup_verify_all(info);
12946 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12947 subvolid, argv[optind], uuidbuf);
12948 ret = print_extent_state(info, subvolid);
12952 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12954 if (!extent_buffer_uptodate(info->tree_root->node) ||
12955 !extent_buffer_uptodate(info->dev_root->node) ||
12956 !extent_buffer_uptodate(info->chunk_root->node)) {
12957 error("critical roots corrupted, unable to check the filesystem");
12963 if (init_extent_tree || init_csum_tree) {
12964 struct btrfs_trans_handle *trans;
12966 trans = btrfs_start_transaction(info->extent_root, 0);
12967 if (IS_ERR(trans)) {
12968 error("error starting transaction");
12969 ret = PTR_ERR(trans);
12974 if (init_extent_tree) {
12975 printf("Creating a new extent tree\n");
12976 ret = reinit_extent_tree(trans, info);
12982 if (init_csum_tree) {
12983 printf("Reinitialize checksum tree\n");
12984 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12986 error("checksum tree initialization failed: %d",
12993 ret = fill_csum_tree(trans, info->csum_root,
12997 error("checksum tree refilling failed: %d", ret);
13002 * Ok now we commit and run the normal fsck, which will add
13003 * extent entries for all of the items it finds.
13005 ret = btrfs_commit_transaction(trans, info->extent_root);
13010 if (!extent_buffer_uptodate(info->extent_root->node)) {
13011 error("critical: extent_root, unable to check the filesystem");
13016 if (!extent_buffer_uptodate(info->csum_root->node)) {
13017 error("critical: csum_root, unable to check the filesystem");
13023 if (!ctx.progress_enabled)
13024 fprintf(stderr, "checking extents\n");
13025 if (check_mode == CHECK_MODE_LOWMEM)
13026 ret = check_chunks_and_extents_v2(root);
13028 ret = check_chunks_and_extents(root);
13032 "errors found in extent allocation tree or chunk allocation");
13034 ret = repair_root_items(info);
13037 error("failed to repair root items: %s", strerror(-ret));
13041 fprintf(stderr, "Fixed %d roots.\n", ret);
13043 } else if (ret > 0) {
13045 "Found %d roots with an outdated root item.\n",
13048 "Please run a filesystem check with the option --repair to fix them.\n");
13054 if (!ctx.progress_enabled) {
13055 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13056 fprintf(stderr, "checking free space tree\n");
13058 fprintf(stderr, "checking free space cache\n");
13060 ret = check_space_cache(root);
13063 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13064 error("errors found in free space tree");
13066 error("errors found in free space cache");
13071 * We used to have to have these hole extents in between our real
13072 * extents so if we don't have this flag set we need to make sure there
13073 * are no gaps in the file extents for inodes, otherwise we can just
13074 * ignore it when this happens.
13076 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13077 if (!ctx.progress_enabled)
13078 fprintf(stderr, "checking fs roots\n");
13079 if (check_mode == CHECK_MODE_LOWMEM)
13080 ret = check_fs_roots_v2(root->fs_info);
13082 ret = check_fs_roots(root, &root_cache);
13085 error("errors found in fs roots");
13089 fprintf(stderr, "checking csums\n");
13090 ret = check_csums(root);
13093 error("errors found in csum tree");
13097 fprintf(stderr, "checking root refs\n");
13098 /* For low memory mode, check_fs_roots_v2 handles root refs */
13099 if (check_mode != CHECK_MODE_LOWMEM) {
13100 ret = check_root_refs(root, &root_cache);
13103 error("errors found in root refs");
13108 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13109 struct extent_buffer *eb;
13111 eb = list_first_entry(&root->fs_info->recow_ebs,
13112 struct extent_buffer, recow);
13113 list_del_init(&eb->recow);
13114 ret = recow_extent_buffer(root, eb);
13117 error("fails to fix transid errors");
13122 while (!list_empty(&delete_items)) {
13123 struct bad_item *bad;
13125 bad = list_first_entry(&delete_items, struct bad_item, list);
13126 list_del_init(&bad->list);
13128 ret = delete_bad_item(root, bad);
13134 if (info->quota_enabled) {
13135 fprintf(stderr, "checking quota groups\n");
13136 ret = qgroup_verify_all(info);
13139 error("failed to check quota groups");
13143 ret = repair_qgroups(info, &qgroups_repaired);
13146 error("failed to repair quota groups");
13152 if (!list_empty(&root->fs_info->recow_ebs)) {
13153 error("transid errors in file system");
13158 if (found_old_backref) { /*
13159 * there was a disk format change when mixed
13160 * backref was in testing tree. The old format
13161 * existed about one week.
13163 printf("\n * Found old mixed backref format. "
13164 "The old format is not supported! *"
13165 "\n * Please mount the FS in readonly mode, "
13166 "backup data and re-format the FS. *\n\n");
13169 printf("found %llu bytes used, ",
13170 (unsigned long long)bytes_used);
13172 printf("error(s) found\n");
13174 printf("no error found\n");
13175 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13176 printf("total tree bytes: %llu\n",
13177 (unsigned long long)total_btree_bytes);
13178 printf("total fs tree bytes: %llu\n",
13179 (unsigned long long)total_fs_tree_bytes);
13180 printf("total extent tree bytes: %llu\n",
13181 (unsigned long long)total_extent_tree_bytes);
13182 printf("btree space waste bytes: %llu\n",
13183 (unsigned long long)btree_space_waste);
13184 printf("file data blocks allocated: %llu\n referenced %llu\n",
13185 (unsigned long long)data_bytes_allocated,
13186 (unsigned long long)data_bytes_referenced);
13188 free_qgroup_counts();
13189 free_root_recs_tree(&root_cache);
13193 if (ctx.progress_enabled)
13194 task_deinit(ctx.info);