2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
43 #include "kernel-shared/ulist.h"
51 TASK_NOTHING, /* have to be the last element */
56 enum task_position tp;
58 struct task_info *info;
61 static u64 bytes_used = 0;
62 static u64 total_csum_bytes = 0;
63 static u64 total_btree_bytes = 0;
64 static u64 total_fs_tree_bytes = 0;
65 static u64 total_extent_tree_bytes = 0;
66 static u64 btree_space_waste = 0;
67 static u64 data_bytes_allocated = 0;
68 static u64 data_bytes_referenced = 0;
69 static int found_old_backref = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
79 enum btrfs_check_mode {
83 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
88 struct extent_backref {
89 struct list_head list;
90 unsigned int is_data:1;
91 unsigned int found_extent_tree:1;
92 unsigned int full_backref:1;
93 unsigned int found_ref:1;
94 unsigned int broken:1;
97 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
99 return list_entry(entry, struct extent_backref, list);
102 struct data_backref {
103 struct extent_backref node;
117 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
118 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
119 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
120 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
121 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
122 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
123 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
124 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
125 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
126 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
127 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
128 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
129 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
130 #define NO_INODE_ITEM (1<<14) /* no inode_item */
131 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
132 #define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
133 #define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
135 static inline struct data_backref* to_data_backref(struct extent_backref *back)
137 return container_of(back, struct data_backref, node);
141 * Much like data_backref, just removed the undetermined members
142 * and change it to use list_head.
143 * During extent scan, it is stored in root->orphan_data_extent.
144 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
146 struct orphan_data_extent {
147 struct list_head list;
155 struct tree_backref {
156 struct extent_backref node;
163 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
165 return container_of(back, struct tree_backref, node);
168 /* Explicit initialization for extent_record::flag_block_full_backref */
169 enum { FLAG_UNSET = 2 };
171 struct extent_record {
172 struct list_head backrefs;
173 struct list_head dups;
174 struct list_head list;
175 struct cache_extent cache;
176 struct btrfs_disk_key parent_key;
181 u64 extent_item_refs;
183 u64 parent_generation;
187 unsigned int flag_block_full_backref:2;
188 unsigned int found_rec:1;
189 unsigned int content_checked:1;
190 unsigned int owner_ref_checked:1;
191 unsigned int is_root:1;
192 unsigned int metadata:1;
193 unsigned int bad_full_backref:1;
194 unsigned int crossing_stripes:1;
195 unsigned int wrong_chunk_type:1;
198 static inline struct extent_record* to_extent_record(struct list_head *entry)
200 return container_of(entry, struct extent_record, list);
203 struct inode_backref {
204 struct list_head list;
205 unsigned int found_dir_item:1;
206 unsigned int found_dir_index:1;
207 unsigned int found_inode_ref:1;
217 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
219 return list_entry(entry, struct inode_backref, list);
222 struct root_item_record {
223 struct list_head list;
230 struct btrfs_key drop_key;
233 #define REF_ERR_NO_DIR_ITEM (1 << 0)
234 #define REF_ERR_NO_DIR_INDEX (1 << 1)
235 #define REF_ERR_NO_INODE_REF (1 << 2)
236 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
237 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
238 #define REF_ERR_DUP_INODE_REF (1 << 5)
239 #define REF_ERR_INDEX_UNMATCH (1 << 6)
240 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
241 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
242 #define REF_ERR_NO_ROOT_REF (1 << 9)
243 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
244 #define REF_ERR_DUP_ROOT_REF (1 << 11)
245 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
247 struct file_extent_hole {
253 struct inode_record {
254 struct list_head backrefs;
255 unsigned int checked:1;
256 unsigned int merging:1;
257 unsigned int found_inode_item:1;
258 unsigned int found_dir_item:1;
259 unsigned int found_file_extent:1;
260 unsigned int found_csum_item:1;
261 unsigned int some_csum_missing:1;
262 unsigned int nodatasum:1;
275 struct rb_root holes;
276 struct list_head orphan_extents;
281 #define I_ERR_NO_INODE_ITEM (1 << 0)
282 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
283 #define I_ERR_DUP_INODE_ITEM (1 << 2)
284 #define I_ERR_DUP_DIR_INDEX (1 << 3)
285 #define I_ERR_ODD_DIR_ITEM (1 << 4)
286 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
287 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
288 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
289 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
290 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
291 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
292 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
293 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
294 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
295 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
297 struct root_backref {
298 struct list_head list;
299 unsigned int found_dir_item:1;
300 unsigned int found_dir_index:1;
301 unsigned int found_back_ref:1;
302 unsigned int found_forward_ref:1;
303 unsigned int reachable:1;
312 static inline struct root_backref* to_root_backref(struct list_head *entry)
314 return list_entry(entry, struct root_backref, list);
318 struct list_head backrefs;
319 struct cache_extent cache;
320 unsigned int found_root_item:1;
326 struct cache_extent cache;
331 struct cache_extent cache;
332 struct cache_tree root_cache;
333 struct cache_tree inode_cache;
334 struct inode_record *current;
343 struct walk_control {
344 struct cache_tree shared;
345 struct shared_node *nodes[BTRFS_MAX_LEVEL];
351 struct btrfs_key key;
353 struct list_head list;
356 struct extent_entry {
361 struct list_head list;
364 struct root_item_info {
365 /* level of the root */
367 /* number of nodes at this level, must be 1 for a root */
371 struct cache_extent cache_extent;
375 * Error bit for low memory mode check.
377 * Currently no caller cares about it yet. Just internal use for error
380 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
381 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
382 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
383 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
384 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
385 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
386 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
387 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
388 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
389 #define CHUNK_TYPE_MISMATCH (1 << 8)
391 static void *print_status_check(void *p)
393 struct task_ctx *priv = p;
394 const char work_indicator[] = { '.', 'o', 'O', 'o' };
396 static char *task_position_string[] = {
398 "checking free space cache",
402 task_period_start(priv->info, 1000 /* 1s */);
404 if (priv->tp == TASK_NOTHING)
408 printf("%s [%c]\r", task_position_string[priv->tp],
409 work_indicator[count % 4]);
412 task_period_wait(priv->info);
417 static int print_status_return(void *p)
425 static enum btrfs_check_mode parse_check_mode(const char *str)
427 if (strcmp(str, "lowmem") == 0)
428 return CHECK_MODE_LOWMEM;
429 if (strcmp(str, "orig") == 0)
430 return CHECK_MODE_ORIGINAL;
431 if (strcmp(str, "original") == 0)
432 return CHECK_MODE_ORIGINAL;
434 return CHECK_MODE_UNKNOWN;
437 /* Compatible function to allow reuse of old codes */
438 static u64 first_extent_gap(struct rb_root *holes)
440 struct file_extent_hole *hole;
442 if (RB_EMPTY_ROOT(holes))
445 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
449 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
451 struct file_extent_hole *hole1;
452 struct file_extent_hole *hole2;
454 hole1 = rb_entry(node1, struct file_extent_hole, node);
455 hole2 = rb_entry(node2, struct file_extent_hole, node);
457 if (hole1->start > hole2->start)
459 if (hole1->start < hole2->start)
461 /* Now hole1->start == hole2->start */
462 if (hole1->len >= hole2->len)
464 * Hole 1 will be merge center
465 * Same hole will be merged later
468 /* Hole 2 will be merge center */
473 * Add a hole to the record
475 * This will do hole merge for copy_file_extent_holes(),
476 * which will ensure there won't be continuous holes.
478 static int add_file_extent_hole(struct rb_root *holes,
481 struct file_extent_hole *hole;
482 struct file_extent_hole *prev = NULL;
483 struct file_extent_hole *next = NULL;
485 hole = malloc(sizeof(*hole));
490 /* Since compare will not return 0, no -EEXIST will happen */
491 rb_insert(holes, &hole->node, compare_hole);
493 /* simple merge with previous hole */
494 if (rb_prev(&hole->node))
495 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
497 if (prev && prev->start + prev->len >= hole->start) {
498 hole->len = hole->start + hole->len - prev->start;
499 hole->start = prev->start;
500 rb_erase(&prev->node, holes);
505 /* iterate merge with next holes */
507 if (!rb_next(&hole->node))
509 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
511 if (hole->start + hole->len >= next->start) {
512 if (hole->start + hole->len <= next->start + next->len)
513 hole->len = next->start + next->len -
515 rb_erase(&next->node, holes);
524 static int compare_hole_range(struct rb_node *node, void *data)
526 struct file_extent_hole *hole;
529 hole = (struct file_extent_hole *)data;
532 hole = rb_entry(node, struct file_extent_hole, node);
533 if (start < hole->start)
535 if (start >= hole->start && start < hole->start + hole->len)
541 * Delete a hole in the record
543 * This will do the hole split and is much restrict than add.
545 static int del_file_extent_hole(struct rb_root *holes,
548 struct file_extent_hole *hole;
549 struct file_extent_hole tmp;
554 struct rb_node *node;
561 node = rb_search(holes, &tmp, compare_hole_range, NULL);
564 hole = rb_entry(node, struct file_extent_hole, node);
565 if (start + len > hole->start + hole->len)
569 * Now there will be no overlap, delete the hole and re-add the
570 * split(s) if they exists.
572 if (start > hole->start) {
573 prev_start = hole->start;
574 prev_len = start - hole->start;
577 if (hole->start + hole->len > start + len) {
578 next_start = start + len;
579 next_len = hole->start + hole->len - start - len;
582 rb_erase(node, holes);
585 ret = add_file_extent_hole(holes, prev_start, prev_len);
590 ret = add_file_extent_hole(holes, next_start, next_len);
597 static int copy_file_extent_holes(struct rb_root *dst,
600 struct file_extent_hole *hole;
601 struct rb_node *node;
604 node = rb_first(src);
606 hole = rb_entry(node, struct file_extent_hole, node);
607 ret = add_file_extent_hole(dst, hole->start, hole->len);
610 node = rb_next(node);
615 static void free_file_extent_holes(struct rb_root *holes)
617 struct rb_node *node;
618 struct file_extent_hole *hole;
620 node = rb_first(holes);
622 hole = rb_entry(node, struct file_extent_hole, node);
623 rb_erase(node, holes);
625 node = rb_first(holes);
629 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
631 static void record_root_in_trans(struct btrfs_trans_handle *trans,
632 struct btrfs_root *root)
634 if (root->last_trans != trans->transid) {
635 root->track_dirty = 1;
636 root->last_trans = trans->transid;
637 root->commit_root = root->node;
638 extent_buffer_get(root->node);
642 static u8 imode_to_type(u32 imode)
645 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
646 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
647 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
648 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
649 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
650 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
651 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
652 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
655 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
659 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
661 struct device_record *rec1;
662 struct device_record *rec2;
664 rec1 = rb_entry(node1, struct device_record, node);
665 rec2 = rb_entry(node2, struct device_record, node);
666 if (rec1->devid > rec2->devid)
668 else if (rec1->devid < rec2->devid)
674 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
676 struct inode_record *rec;
677 struct inode_backref *backref;
678 struct inode_backref *orig;
679 struct inode_backref *tmp;
680 struct orphan_data_extent *src_orphan;
681 struct orphan_data_extent *dst_orphan;
686 rec = malloc(sizeof(*rec));
688 return ERR_PTR(-ENOMEM);
689 memcpy(rec, orig_rec, sizeof(*rec));
691 INIT_LIST_HEAD(&rec->backrefs);
692 INIT_LIST_HEAD(&rec->orphan_extents);
693 rec->holes = RB_ROOT;
695 list_for_each_entry(orig, &orig_rec->backrefs, list) {
696 size = sizeof(*orig) + orig->namelen + 1;
697 backref = malloc(size);
702 memcpy(backref, orig, size);
703 list_add_tail(&backref->list, &rec->backrefs);
705 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
706 dst_orphan = malloc(sizeof(*dst_orphan));
711 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
712 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
714 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
721 rb = rb_first(&rec->holes);
723 struct file_extent_hole *hole;
725 hole = rb_entry(rb, struct file_extent_hole, node);
731 if (!list_empty(&rec->backrefs))
732 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
733 list_del(&orig->list);
737 if (!list_empty(&rec->orphan_extents))
738 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
739 list_del(&orig->list);
748 static void print_orphan_data_extents(struct list_head *orphan_extents,
751 struct orphan_data_extent *orphan;
753 if (list_empty(orphan_extents))
755 printf("The following data extent is lost in tree %llu:\n",
757 list_for_each_entry(orphan, orphan_extents, list) {
758 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
759 orphan->objectid, orphan->offset, orphan->disk_bytenr,
764 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
766 u64 root_objectid = root->root_key.objectid;
767 int errors = rec->errors;
771 /* reloc root errors, we print its corresponding fs root objectid*/
772 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
773 root_objectid = root->root_key.offset;
774 fprintf(stderr, "reloc");
776 fprintf(stderr, "root %llu inode %llu errors %x",
777 (unsigned long long) root_objectid,
778 (unsigned long long) rec->ino, rec->errors);
780 if (errors & I_ERR_NO_INODE_ITEM)
781 fprintf(stderr, ", no inode item");
782 if (errors & I_ERR_NO_ORPHAN_ITEM)
783 fprintf(stderr, ", no orphan item");
784 if (errors & I_ERR_DUP_INODE_ITEM)
785 fprintf(stderr, ", dup inode item");
786 if (errors & I_ERR_DUP_DIR_INDEX)
787 fprintf(stderr, ", dup dir index");
788 if (errors & I_ERR_ODD_DIR_ITEM)
789 fprintf(stderr, ", odd dir item");
790 if (errors & I_ERR_ODD_FILE_EXTENT)
791 fprintf(stderr, ", odd file extent");
792 if (errors & I_ERR_BAD_FILE_EXTENT)
793 fprintf(stderr, ", bad file extent");
794 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
795 fprintf(stderr, ", file extent overlap");
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
797 fprintf(stderr, ", file extent discount");
798 if (errors & I_ERR_DIR_ISIZE_WRONG)
799 fprintf(stderr, ", dir isize wrong");
800 if (errors & I_ERR_FILE_NBYTES_WRONG)
801 fprintf(stderr, ", nbytes wrong");
802 if (errors & I_ERR_ODD_CSUM_ITEM)
803 fprintf(stderr, ", odd csum item");
804 if (errors & I_ERR_SOME_CSUM_MISSING)
805 fprintf(stderr, ", some csum missing");
806 if (errors & I_ERR_LINK_COUNT_WRONG)
807 fprintf(stderr, ", link count wrong");
808 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
809 fprintf(stderr, ", orphan file extent");
810 fprintf(stderr, "\n");
811 /* Print the orphan extents if needed */
812 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
813 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
815 /* Print the holes if needed */
816 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
817 struct file_extent_hole *hole;
818 struct rb_node *node;
821 node = rb_first(&rec->holes);
822 fprintf(stderr, "Found file extent holes:\n");
825 hole = rb_entry(node, struct file_extent_hole, node);
826 fprintf(stderr, "\tstart: %llu, len: %llu\n",
827 hole->start, hole->len);
828 node = rb_next(node);
831 fprintf(stderr, "\tstart: 0, len: %llu\n",
833 root->fs_info->sectorsize));
837 static void print_ref_error(int errors)
839 if (errors & REF_ERR_NO_DIR_ITEM)
840 fprintf(stderr, ", no dir item");
841 if (errors & REF_ERR_NO_DIR_INDEX)
842 fprintf(stderr, ", no dir index");
843 if (errors & REF_ERR_NO_INODE_REF)
844 fprintf(stderr, ", no inode ref");
845 if (errors & REF_ERR_DUP_DIR_ITEM)
846 fprintf(stderr, ", dup dir item");
847 if (errors & REF_ERR_DUP_DIR_INDEX)
848 fprintf(stderr, ", dup dir index");
849 if (errors & REF_ERR_DUP_INODE_REF)
850 fprintf(stderr, ", dup inode ref");
851 if (errors & REF_ERR_INDEX_UNMATCH)
852 fprintf(stderr, ", index mismatch");
853 if (errors & REF_ERR_FILETYPE_UNMATCH)
854 fprintf(stderr, ", filetype mismatch");
855 if (errors & REF_ERR_NAME_TOO_LONG)
856 fprintf(stderr, ", name too long");
857 if (errors & REF_ERR_NO_ROOT_REF)
858 fprintf(stderr, ", no root ref");
859 if (errors & REF_ERR_NO_ROOT_BACKREF)
860 fprintf(stderr, ", no root backref");
861 if (errors & REF_ERR_DUP_ROOT_REF)
862 fprintf(stderr, ", dup root ref");
863 if (errors & REF_ERR_DUP_ROOT_BACKREF)
864 fprintf(stderr, ", dup root backref");
865 fprintf(stderr, "\n");
868 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
871 struct ptr_node *node;
872 struct cache_extent *cache;
873 struct inode_record *rec = NULL;
876 cache = lookup_cache_extent(inode_cache, ino, 1);
878 node = container_of(cache, struct ptr_node, cache);
880 if (mod && rec->refs > 1) {
881 node->data = clone_inode_rec(rec);
882 if (IS_ERR(node->data))
888 rec = calloc(1, sizeof(*rec));
890 return ERR_PTR(-ENOMEM);
892 rec->extent_start = (u64)-1;
894 INIT_LIST_HEAD(&rec->backrefs);
895 INIT_LIST_HEAD(&rec->orphan_extents);
896 rec->holes = RB_ROOT;
898 node = malloc(sizeof(*node));
901 return ERR_PTR(-ENOMEM);
903 node->cache.start = ino;
904 node->cache.size = 1;
907 if (ino == BTRFS_FREE_INO_OBJECTID)
910 ret = insert_cache_extent(inode_cache, &node->cache);
912 return ERR_PTR(-EEXIST);
917 static void free_orphan_data_extents(struct list_head *orphan_extents)
919 struct orphan_data_extent *orphan;
921 while (!list_empty(orphan_extents)) {
922 orphan = list_entry(orphan_extents->next,
923 struct orphan_data_extent, list);
924 list_del(&orphan->list);
929 static void free_inode_rec(struct inode_record *rec)
931 struct inode_backref *backref;
936 while (!list_empty(&rec->backrefs)) {
937 backref = to_inode_backref(rec->backrefs.next);
938 list_del(&backref->list);
941 free_orphan_data_extents(&rec->orphan_extents);
942 free_file_extent_holes(&rec->holes);
946 static int can_free_inode_rec(struct inode_record *rec)
948 if (!rec->errors && rec->checked && rec->found_inode_item &&
949 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
954 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
955 struct inode_record *rec)
957 struct cache_extent *cache;
958 struct inode_backref *tmp, *backref;
959 struct ptr_node *node;
962 if (!rec->found_inode_item)
965 filetype = imode_to_type(rec->imode);
966 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
967 if (backref->found_dir_item && backref->found_dir_index) {
968 if (backref->filetype != filetype)
969 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
970 if (!backref->errors && backref->found_inode_ref &&
971 rec->nlink == rec->found_link) {
972 list_del(&backref->list);
978 if (!rec->checked || rec->merging)
981 if (S_ISDIR(rec->imode)) {
982 if (rec->found_size != rec->isize)
983 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
984 if (rec->found_file_extent)
985 rec->errors |= I_ERR_ODD_FILE_EXTENT;
986 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
987 if (rec->found_dir_item)
988 rec->errors |= I_ERR_ODD_DIR_ITEM;
989 if (rec->found_size != rec->nbytes)
990 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
991 if (rec->nlink > 0 && !no_holes &&
992 (rec->extent_end < rec->isize ||
993 first_extent_gap(&rec->holes) < rec->isize))
994 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
997 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
998 if (rec->found_csum_item && rec->nodatasum)
999 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1000 if (rec->some_csum_missing && !rec->nodatasum)
1001 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1004 BUG_ON(rec->refs != 1);
1005 if (can_free_inode_rec(rec)) {
1006 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1007 node = container_of(cache, struct ptr_node, cache);
1008 BUG_ON(node->data != rec);
1009 remove_cache_extent(inode_cache, &node->cache);
1011 free_inode_rec(rec);
1015 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1017 struct btrfs_path path;
1018 struct btrfs_key key;
1021 key.objectid = BTRFS_ORPHAN_OBJECTID;
1022 key.type = BTRFS_ORPHAN_ITEM_KEY;
1025 btrfs_init_path(&path);
1026 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1027 btrfs_release_path(&path);
1033 static int process_inode_item(struct extent_buffer *eb,
1034 int slot, struct btrfs_key *key,
1035 struct shared_node *active_node)
1037 struct inode_record *rec;
1038 struct btrfs_inode_item *item;
1040 rec = active_node->current;
1041 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1042 if (rec->found_inode_item) {
1043 rec->errors |= I_ERR_DUP_INODE_ITEM;
1046 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1047 rec->nlink = btrfs_inode_nlink(eb, item);
1048 rec->isize = btrfs_inode_size(eb, item);
1049 rec->nbytes = btrfs_inode_nbytes(eb, item);
1050 rec->imode = btrfs_inode_mode(eb, item);
1051 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1053 rec->found_inode_item = 1;
1054 if (rec->nlink == 0)
1055 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1056 maybe_free_inode_rec(&active_node->inode_cache, rec);
1060 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1062 int namelen, u64 dir)
1064 struct inode_backref *backref;
1066 list_for_each_entry(backref, &rec->backrefs, list) {
1067 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1069 if (backref->dir != dir || backref->namelen != namelen)
1071 if (memcmp(name, backref->name, namelen))
1076 backref = malloc(sizeof(*backref) + namelen + 1);
1079 memset(backref, 0, sizeof(*backref));
1081 backref->namelen = namelen;
1082 memcpy(backref->name, name, namelen);
1083 backref->name[namelen] = '\0';
1084 list_add_tail(&backref->list, &rec->backrefs);
1088 static int add_inode_backref(struct cache_tree *inode_cache,
1089 u64 ino, u64 dir, u64 index,
1090 const char *name, int namelen,
1091 u8 filetype, u8 itemtype, int errors)
1093 struct inode_record *rec;
1094 struct inode_backref *backref;
1096 rec = get_inode_rec(inode_cache, ino, 1);
1097 BUG_ON(IS_ERR(rec));
1098 backref = get_inode_backref(rec, name, namelen, dir);
1101 backref->errors |= errors;
1102 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1103 if (backref->found_dir_index)
1104 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1105 if (backref->found_inode_ref && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1107 if (backref->found_dir_item && backref->filetype != filetype)
1108 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1110 backref->index = index;
1111 backref->filetype = filetype;
1112 backref->found_dir_index = 1;
1113 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1115 if (backref->found_dir_item)
1116 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1117 if (backref->found_dir_index && backref->filetype != filetype)
1118 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1120 backref->filetype = filetype;
1121 backref->found_dir_item = 1;
1122 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1123 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1124 if (backref->found_inode_ref)
1125 backref->errors |= REF_ERR_DUP_INODE_REF;
1126 if (backref->found_dir_index && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1129 backref->index = index;
1131 backref->ref_type = itemtype;
1132 backref->found_inode_ref = 1;
1137 maybe_free_inode_rec(inode_cache, rec);
1141 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1142 struct cache_tree *dst_cache)
1144 struct inode_backref *backref;
1149 list_for_each_entry(backref, &src->backrefs, list) {
1150 if (backref->found_dir_index) {
1151 add_inode_backref(dst_cache, dst->ino, backref->dir,
1152 backref->index, backref->name,
1153 backref->namelen, backref->filetype,
1154 BTRFS_DIR_INDEX_KEY, backref->errors);
1156 if (backref->found_dir_item) {
1158 add_inode_backref(dst_cache, dst->ino,
1159 backref->dir, 0, backref->name,
1160 backref->namelen, backref->filetype,
1161 BTRFS_DIR_ITEM_KEY, backref->errors);
1163 if (backref->found_inode_ref) {
1164 add_inode_backref(dst_cache, dst->ino,
1165 backref->dir, backref->index,
1166 backref->name, backref->namelen, 0,
1167 backref->ref_type, backref->errors);
1171 if (src->found_dir_item)
1172 dst->found_dir_item = 1;
1173 if (src->found_file_extent)
1174 dst->found_file_extent = 1;
1175 if (src->found_csum_item)
1176 dst->found_csum_item = 1;
1177 if (src->some_csum_missing)
1178 dst->some_csum_missing = 1;
1179 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1180 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1185 BUG_ON(src->found_link < dir_count);
1186 dst->found_link += src->found_link - dir_count;
1187 dst->found_size += src->found_size;
1188 if (src->extent_start != (u64)-1) {
1189 if (dst->extent_start == (u64)-1) {
1190 dst->extent_start = src->extent_start;
1191 dst->extent_end = src->extent_end;
1193 if (dst->extent_end > src->extent_start)
1194 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1195 else if (dst->extent_end < src->extent_start) {
1196 ret = add_file_extent_hole(&dst->holes,
1198 src->extent_start - dst->extent_end);
1200 if (dst->extent_end < src->extent_end)
1201 dst->extent_end = src->extent_end;
1205 dst->errors |= src->errors;
1206 if (src->found_inode_item) {
1207 if (!dst->found_inode_item) {
1208 dst->nlink = src->nlink;
1209 dst->isize = src->isize;
1210 dst->nbytes = src->nbytes;
1211 dst->imode = src->imode;
1212 dst->nodatasum = src->nodatasum;
1213 dst->found_inode_item = 1;
1215 dst->errors |= I_ERR_DUP_INODE_ITEM;
1223 static int splice_shared_node(struct shared_node *src_node,
1224 struct shared_node *dst_node)
1226 struct cache_extent *cache;
1227 struct ptr_node *node, *ins;
1228 struct cache_tree *src, *dst;
1229 struct inode_record *rec, *conflict;
1230 u64 current_ino = 0;
1234 if (--src_node->refs == 0)
1236 if (src_node->current)
1237 current_ino = src_node->current->ino;
1239 src = &src_node->root_cache;
1240 dst = &dst_node->root_cache;
1242 cache = search_cache_extent(src, 0);
1244 node = container_of(cache, struct ptr_node, cache);
1246 cache = next_cache_extent(cache);
1249 remove_cache_extent(src, &node->cache);
1252 ins = malloc(sizeof(*ins));
1254 ins->cache.start = node->cache.start;
1255 ins->cache.size = node->cache.size;
1259 ret = insert_cache_extent(dst, &ins->cache);
1260 if (ret == -EEXIST) {
1261 conflict = get_inode_rec(dst, rec->ino, 1);
1262 BUG_ON(IS_ERR(conflict));
1263 merge_inode_recs(rec, conflict, dst);
1265 conflict->checked = 1;
1266 if (dst_node->current == conflict)
1267 dst_node->current = NULL;
1269 maybe_free_inode_rec(dst, conflict);
1270 free_inode_rec(rec);
1277 if (src == &src_node->root_cache) {
1278 src = &src_node->inode_cache;
1279 dst = &dst_node->inode_cache;
1283 if (current_ino > 0 && (!dst_node->current ||
1284 current_ino > dst_node->current->ino)) {
1285 if (dst_node->current) {
1286 dst_node->current->checked = 1;
1287 maybe_free_inode_rec(dst, dst_node->current);
1289 dst_node->current = get_inode_rec(dst, current_ino, 1);
1290 BUG_ON(IS_ERR(dst_node->current));
1295 static void free_inode_ptr(struct cache_extent *cache)
1297 struct ptr_node *node;
1298 struct inode_record *rec;
1300 node = container_of(cache, struct ptr_node, cache);
1302 free_inode_rec(rec);
1306 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1308 static struct shared_node *find_shared_node(struct cache_tree *shared,
1311 struct cache_extent *cache;
1312 struct shared_node *node;
1314 cache = lookup_cache_extent(shared, bytenr, 1);
1316 node = container_of(cache, struct shared_node, cache);
1322 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1325 struct shared_node *node;
1327 node = calloc(1, sizeof(*node));
1330 node->cache.start = bytenr;
1331 node->cache.size = 1;
1332 cache_tree_init(&node->root_cache);
1333 cache_tree_init(&node->inode_cache);
1336 ret = insert_cache_extent(shared, &node->cache);
1341 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1342 struct walk_control *wc, int level)
1344 struct shared_node *node;
1345 struct shared_node *dest;
1348 if (level == wc->active_node)
1351 BUG_ON(wc->active_node <= level);
1352 node = find_shared_node(&wc->shared, bytenr);
1354 ret = add_shared_node(&wc->shared, bytenr, refs);
1356 node = find_shared_node(&wc->shared, bytenr);
1357 wc->nodes[level] = node;
1358 wc->active_node = level;
1362 if (wc->root_level == wc->active_node &&
1363 btrfs_root_refs(&root->root_item) == 0) {
1364 if (--node->refs == 0) {
1365 free_inode_recs_tree(&node->root_cache);
1366 free_inode_recs_tree(&node->inode_cache);
1367 remove_cache_extent(&wc->shared, &node->cache);
1373 dest = wc->nodes[wc->active_node];
1374 splice_shared_node(node, dest);
1375 if (node->refs == 0) {
1376 remove_cache_extent(&wc->shared, &node->cache);
1382 static int leave_shared_node(struct btrfs_root *root,
1383 struct walk_control *wc, int level)
1385 struct shared_node *node;
1386 struct shared_node *dest;
1389 if (level == wc->root_level)
1392 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1396 BUG_ON(i >= BTRFS_MAX_LEVEL);
1398 node = wc->nodes[wc->active_node];
1399 wc->nodes[wc->active_node] = NULL;
1400 wc->active_node = i;
1402 dest = wc->nodes[wc->active_node];
1403 if (wc->active_node < wc->root_level ||
1404 btrfs_root_refs(&root->root_item) > 0) {
1405 BUG_ON(node->refs <= 1);
1406 splice_shared_node(node, dest);
1408 BUG_ON(node->refs < 2);
1417 * 1 - if the root with id child_root_id is a child of root parent_root_id
1418 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1419 * has other root(s) as parent(s)
1420 * 2 - if the root child_root_id doesn't have any parent roots
1422 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1425 struct btrfs_path path;
1426 struct btrfs_key key;
1427 struct extent_buffer *leaf;
1431 btrfs_init_path(&path);
1433 key.objectid = parent_root_id;
1434 key.type = BTRFS_ROOT_REF_KEY;
1435 key.offset = child_root_id;
1436 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1440 btrfs_release_path(&path);
1444 key.objectid = child_root_id;
1445 key.type = BTRFS_ROOT_BACKREF_KEY;
1447 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1453 leaf = path.nodes[0];
1454 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1455 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1458 leaf = path.nodes[0];
1461 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1462 if (key.objectid != child_root_id ||
1463 key.type != BTRFS_ROOT_BACKREF_KEY)
1468 if (key.offset == parent_root_id) {
1469 btrfs_release_path(&path);
1476 btrfs_release_path(&path);
1479 return has_parent ? 0 : 2;
1482 static int process_dir_item(struct extent_buffer *eb,
1483 int slot, struct btrfs_key *key,
1484 struct shared_node *active_node)
1494 struct btrfs_dir_item *di;
1495 struct inode_record *rec;
1496 struct cache_tree *root_cache;
1497 struct cache_tree *inode_cache;
1498 struct btrfs_key location;
1499 char namebuf[BTRFS_NAME_LEN];
1501 root_cache = &active_node->root_cache;
1502 inode_cache = &active_node->inode_cache;
1503 rec = active_node->current;
1504 rec->found_dir_item = 1;
1506 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1507 total = btrfs_item_size_nr(eb, slot);
1508 while (cur < total) {
1510 btrfs_dir_item_key_to_cpu(eb, di, &location);
1511 name_len = btrfs_dir_name_len(eb, di);
1512 data_len = btrfs_dir_data_len(eb, di);
1513 filetype = btrfs_dir_type(eb, di);
1515 rec->found_size += name_len;
1516 if (cur + sizeof(*di) + name_len > total ||
1517 name_len > BTRFS_NAME_LEN) {
1518 error = REF_ERR_NAME_TOO_LONG;
1520 if (cur + sizeof(*di) > total)
1522 len = min_t(u32, total - cur - sizeof(*di),
1529 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1531 if (location.type == BTRFS_INODE_ITEM_KEY) {
1532 add_inode_backref(inode_cache, location.objectid,
1533 key->objectid, key->offset, namebuf,
1534 len, filetype, key->type, error);
1535 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1536 add_inode_backref(root_cache, location.objectid,
1537 key->objectid, key->offset,
1538 namebuf, len, filetype,
1541 fprintf(stderr, "invalid location in dir item %u\n",
1543 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1544 key->objectid, key->offset, namebuf,
1545 len, filetype, key->type, error);
1548 len = sizeof(*di) + name_len + data_len;
1549 di = (struct btrfs_dir_item *)((char *)di + len);
1552 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1553 rec->errors |= I_ERR_DUP_DIR_INDEX;
1558 static int process_inode_ref(struct extent_buffer *eb,
1559 int slot, struct btrfs_key *key,
1560 struct shared_node *active_node)
1568 struct cache_tree *inode_cache;
1569 struct btrfs_inode_ref *ref;
1570 char namebuf[BTRFS_NAME_LEN];
1572 inode_cache = &active_node->inode_cache;
1574 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1575 total = btrfs_item_size_nr(eb, slot);
1576 while (cur < total) {
1577 name_len = btrfs_inode_ref_name_len(eb, ref);
1578 index = btrfs_inode_ref_index(eb, ref);
1580 /* inode_ref + namelen should not cross item boundary */
1581 if (cur + sizeof(*ref) + name_len > total ||
1582 name_len > BTRFS_NAME_LEN) {
1583 if (total < cur + sizeof(*ref))
1586 /* Still try to read out the remaining part */
1587 len = min_t(u32, total - cur - sizeof(*ref),
1589 error = REF_ERR_NAME_TOO_LONG;
1595 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1596 add_inode_backref(inode_cache, key->objectid, key->offset,
1597 index, namebuf, len, 0, key->type, error);
1599 len = sizeof(*ref) + name_len;
1600 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1606 static int process_inode_extref(struct extent_buffer *eb,
1607 int slot, struct btrfs_key *key,
1608 struct shared_node *active_node)
1617 struct cache_tree *inode_cache;
1618 struct btrfs_inode_extref *extref;
1619 char namebuf[BTRFS_NAME_LEN];
1621 inode_cache = &active_node->inode_cache;
1623 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1624 total = btrfs_item_size_nr(eb, slot);
1625 while (cur < total) {
1626 name_len = btrfs_inode_extref_name_len(eb, extref);
1627 index = btrfs_inode_extref_index(eb, extref);
1628 parent = btrfs_inode_extref_parent(eb, extref);
1629 if (name_len <= BTRFS_NAME_LEN) {
1633 len = BTRFS_NAME_LEN;
1634 error = REF_ERR_NAME_TOO_LONG;
1636 read_extent_buffer(eb, namebuf,
1637 (unsigned long)(extref + 1), len);
1638 add_inode_backref(inode_cache, key->objectid, parent,
1639 index, namebuf, len, 0, key->type, error);
1641 len = sizeof(*extref) + name_len;
1642 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1649 static int count_csum_range(struct btrfs_root *root, u64 start,
1650 u64 len, u64 *found)
1652 struct btrfs_key key;
1653 struct btrfs_path path;
1654 struct extent_buffer *leaf;
1659 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1661 btrfs_init_path(&path);
1663 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1665 key.type = BTRFS_EXTENT_CSUM_KEY;
1667 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1671 if (ret > 0 && path.slots[0] > 0) {
1672 leaf = path.nodes[0];
1673 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1674 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1675 key.type == BTRFS_EXTENT_CSUM_KEY)
1680 leaf = path.nodes[0];
1681 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1682 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1687 leaf = path.nodes[0];
1690 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1691 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1692 key.type != BTRFS_EXTENT_CSUM_KEY)
1695 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1696 if (key.offset >= start + len)
1699 if (key.offset > start)
1702 size = btrfs_item_size_nr(leaf, path.slots[0]);
1703 csum_end = key.offset + (size / csum_size) *
1704 root->fs_info->sectorsize;
1705 if (csum_end > start) {
1706 size = min(csum_end - start, len);
1715 btrfs_release_path(&path);
1721 static int process_file_extent(struct btrfs_root *root,
1722 struct extent_buffer *eb,
1723 int slot, struct btrfs_key *key,
1724 struct shared_node *active_node)
1726 struct inode_record *rec;
1727 struct btrfs_file_extent_item *fi;
1729 u64 disk_bytenr = 0;
1730 u64 extent_offset = 0;
1731 u64 mask = root->fs_info->sectorsize - 1;
1735 rec = active_node->current;
1736 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1737 rec->found_file_extent = 1;
1739 if (rec->extent_start == (u64)-1) {
1740 rec->extent_start = key->offset;
1741 rec->extent_end = key->offset;
1744 if (rec->extent_end > key->offset)
1745 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1746 else if (rec->extent_end < key->offset) {
1747 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1748 key->offset - rec->extent_end);
1753 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1754 extent_type = btrfs_file_extent_type(eb, fi);
1756 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1757 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1759 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1760 rec->found_size += num_bytes;
1761 num_bytes = (num_bytes + mask) & ~mask;
1762 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1763 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1764 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1765 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1766 extent_offset = btrfs_file_extent_offset(eb, fi);
1767 if (num_bytes == 0 || (num_bytes & mask))
1768 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1769 if (num_bytes + extent_offset >
1770 btrfs_file_extent_ram_bytes(eb, fi))
1771 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1772 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1773 (btrfs_file_extent_compression(eb, fi) ||
1774 btrfs_file_extent_encryption(eb, fi) ||
1775 btrfs_file_extent_other_encoding(eb, fi)))
1776 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1777 if (disk_bytenr > 0)
1778 rec->found_size += num_bytes;
1780 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782 rec->extent_end = key->offset + num_bytes;
1785 * The data reloc tree will copy full extents into its inode and then
1786 * copy the corresponding csums. Because the extent it copied could be
1787 * a preallocated extent that hasn't been written to yet there may be no
1788 * csums to copy, ergo we won't have csums for our file extent. This is
1789 * ok so just don't bother checking csums if the inode belongs to the
1792 if (disk_bytenr > 0 &&
1793 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1795 if (btrfs_file_extent_compression(eb, fi))
1796 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1798 disk_bytenr += extent_offset;
1800 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1803 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1805 rec->found_csum_item = 1;
1806 if (found < num_bytes)
1807 rec->some_csum_missing = 1;
1808 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1810 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1816 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1817 struct walk_control *wc)
1819 struct btrfs_key key;
1823 struct cache_tree *inode_cache;
1824 struct shared_node *active_node;
1826 if (wc->root_level == wc->active_node &&
1827 btrfs_root_refs(&root->root_item) == 0)
1830 active_node = wc->nodes[wc->active_node];
1831 inode_cache = &active_node->inode_cache;
1832 nritems = btrfs_header_nritems(eb);
1833 for (i = 0; i < nritems; i++) {
1834 btrfs_item_key_to_cpu(eb, &key, i);
1836 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1838 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1841 if (active_node->current == NULL ||
1842 active_node->current->ino < key.objectid) {
1843 if (active_node->current) {
1844 active_node->current->checked = 1;
1845 maybe_free_inode_rec(inode_cache,
1846 active_node->current);
1848 active_node->current = get_inode_rec(inode_cache,
1850 BUG_ON(IS_ERR(active_node->current));
1853 case BTRFS_DIR_ITEM_KEY:
1854 case BTRFS_DIR_INDEX_KEY:
1855 ret = process_dir_item(eb, i, &key, active_node);
1857 case BTRFS_INODE_REF_KEY:
1858 ret = process_inode_ref(eb, i, &key, active_node);
1860 case BTRFS_INODE_EXTREF_KEY:
1861 ret = process_inode_extref(eb, i, &key, active_node);
1863 case BTRFS_INODE_ITEM_KEY:
1864 ret = process_inode_item(eb, i, &key, active_node);
1866 case BTRFS_EXTENT_DATA_KEY:
1867 ret = process_file_extent(root, eb, i, &key,
1878 u64 bytenr[BTRFS_MAX_LEVEL];
1879 u64 refs[BTRFS_MAX_LEVEL];
1880 int need_check[BTRFS_MAX_LEVEL];
1883 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1884 struct node_refs *nrefs, u64 level);
1885 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1886 unsigned int ext_ref);
1889 * Returns >0 Found error, not fatal, should continue
1890 * Returns <0 Fatal error, must exit the whole check
1891 * Returns 0 No errors found
1893 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1894 struct node_refs *nrefs, int *level, int ext_ref)
1896 struct extent_buffer *cur = path->nodes[0];
1897 struct btrfs_key key;
1901 int root_level = btrfs_header_level(root->node);
1903 int ret = 0; /* Final return value */
1904 int err = 0; /* Positive error bitmap */
1906 cur_bytenr = cur->start;
1908 /* skip to first inode item or the first inode number change */
1909 nritems = btrfs_header_nritems(cur);
1910 for (i = 0; i < nritems; i++) {
1911 btrfs_item_key_to_cpu(cur, &key, i);
1913 first_ino = key.objectid;
1914 if (key.type == BTRFS_INODE_ITEM_KEY ||
1915 (first_ino && first_ino != key.objectid))
1919 path->slots[0] = nritems;
1925 err |= check_inode_item(root, path, ext_ref);
1927 if (err & LAST_ITEM)
1930 /* still have inode items in thie leaf */
1931 if (cur->start == cur_bytenr)
1935 * we have switched to another leaf, above nodes may
1936 * have changed, here walk down the path, if a node
1937 * or leaf is shared, check whether we can skip this
1940 for (i = root_level; i >= 0; i--) {
1941 if (path->nodes[i]->start == nrefs->bytenr[i])
1944 ret = update_nodes_refs(root,
1945 path->nodes[i]->start,
1950 if (!nrefs->need_check[i]) {
1956 for (i = 0; i < *level; i++) {
1957 free_extent_buffer(path->nodes[i]);
1958 path->nodes[i] = NULL;
1967 static void reada_walk_down(struct btrfs_root *root,
1968 struct extent_buffer *node, int slot)
1977 level = btrfs_header_level(node);
1981 nritems = btrfs_header_nritems(node);
1982 blocksize = root->fs_info->nodesize;
1983 for (i = slot; i < nritems; i++) {
1984 bytenr = btrfs_node_blockptr(node, i);
1985 ptr_gen = btrfs_node_ptr_generation(node, i);
1986 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1991 * Check the child node/leaf by the following condition:
1992 * 1. the first item key of the node/leaf should be the same with the one
1994 * 2. block in parent node should match the child node/leaf.
1995 * 3. generation of parent node and child's header should be consistent.
1997 * Or the child node/leaf pointed by the key in parent is not valid.
1999 * We hope to check leaf owner too, but since subvol may share leaves,
2000 * which makes leaf owner check not so strong, key check should be
2001 * sufficient enough for that case.
2003 static int check_child_node(struct extent_buffer *parent, int slot,
2004 struct extent_buffer *child)
2006 struct btrfs_key parent_key;
2007 struct btrfs_key child_key;
2010 btrfs_node_key_to_cpu(parent, &parent_key, slot);
2011 if (btrfs_header_level(child) == 0)
2012 btrfs_item_key_to_cpu(child, &child_key, 0);
2014 btrfs_node_key_to_cpu(child, &child_key, 0);
2016 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
2019 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
2020 parent_key.objectid, parent_key.type, parent_key.offset,
2021 child_key.objectid, child_key.type, child_key.offset);
2023 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
2025 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
2026 btrfs_node_blockptr(parent, slot),
2027 btrfs_header_bytenr(child));
2029 if (btrfs_node_ptr_generation(parent, slot) !=
2030 btrfs_header_generation(child)) {
2032 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
2033 btrfs_header_generation(child),
2034 btrfs_node_ptr_generation(parent, slot));
2040 * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
2041 * in every fs or file tree check. Here we find its all root ids, and only check
2042 * it in the fs or file tree which has the smallest root id.
2044 static int need_check(struct btrfs_root *root, struct ulist *roots)
2046 struct rb_node *node;
2047 struct ulist_node *u;
2049 if (roots->nnodes == 1)
2052 node = rb_first(&roots->root);
2053 u = rb_entry(node, struct ulist_node, rb_node);
2055 * current root id is not smallest, we skip it and let it be checked
2056 * in the fs or file tree who hash the smallest root id.
2058 if (root->objectid != u->val)
2065 * for a tree node or leaf, we record its reference count, so later if we still
2066 * process this node or leaf, don't need to compute its reference count again.
2068 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2069 struct node_refs *nrefs, u64 level)
2073 struct ulist *roots;
2075 if (nrefs->bytenr[level] != bytenr) {
2076 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2077 level, 1, &refs, NULL);
2081 nrefs->bytenr[level] = bytenr;
2082 nrefs->refs[level] = refs;
2084 ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2089 check = need_check(root, roots);
2091 nrefs->need_check[level] = check;
2093 nrefs->need_check[level] = 1;
2100 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2101 struct walk_control *wc, int *level,
2102 struct node_refs *nrefs)
2104 enum btrfs_tree_block_status status;
2107 struct btrfs_fs_info *fs_info = root->fs_info;
2108 struct extent_buffer *next;
2109 struct extent_buffer *cur;
2114 WARN_ON(*level < 0);
2115 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2117 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2118 refs = nrefs->refs[*level];
2121 ret = btrfs_lookup_extent_info(NULL, root,
2122 path->nodes[*level]->start,
2123 *level, 1, &refs, NULL);
2128 nrefs->bytenr[*level] = path->nodes[*level]->start;
2129 nrefs->refs[*level] = refs;
2133 ret = enter_shared_node(root, path->nodes[*level]->start,
2141 while (*level >= 0) {
2142 WARN_ON(*level < 0);
2143 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2144 cur = path->nodes[*level];
2146 if (btrfs_header_level(cur) != *level)
2149 if (path->slots[*level] >= btrfs_header_nritems(cur))
2152 ret = process_one_leaf(root, cur, wc);
2157 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2158 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2159 blocksize = fs_info->nodesize;
2161 if (bytenr == nrefs->bytenr[*level - 1]) {
2162 refs = nrefs->refs[*level - 1];
2164 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2165 *level - 1, 1, &refs, NULL);
2169 nrefs->bytenr[*level - 1] = bytenr;
2170 nrefs->refs[*level - 1] = refs;
2175 ret = enter_shared_node(root, bytenr, refs,
2178 path->slots[*level]++;
2183 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2184 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2185 free_extent_buffer(next);
2186 reada_walk_down(root, cur, path->slots[*level]);
2187 next = read_tree_block(root->fs_info, bytenr, blocksize,
2189 if (!extent_buffer_uptodate(next)) {
2190 struct btrfs_key node_key;
2192 btrfs_node_key_to_cpu(path->nodes[*level],
2194 path->slots[*level]);
2195 btrfs_add_corrupt_extent_record(root->fs_info,
2197 path->nodes[*level]->start,
2198 root->fs_info->nodesize,
2205 ret = check_child_node(cur, path->slots[*level], next);
2207 free_extent_buffer(next);
2212 if (btrfs_is_leaf(next))
2213 status = btrfs_check_leaf(root, NULL, next);
2215 status = btrfs_check_node(root, NULL, next);
2216 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2217 free_extent_buffer(next);
2222 *level = *level - 1;
2223 free_extent_buffer(path->nodes[*level]);
2224 path->nodes[*level] = next;
2225 path->slots[*level] = 0;
2228 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2232 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2233 unsigned int ext_ref);
2236 * Returns >0 Found error, should continue
2237 * Returns <0 Fatal error, must exit the whole check
2238 * Returns 0 No errors found
2240 static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2241 int *level, struct node_refs *nrefs, int ext_ref)
2243 enum btrfs_tree_block_status status;
2246 struct btrfs_fs_info *fs_info = root->fs_info;
2247 struct extent_buffer *next;
2248 struct extent_buffer *cur;
2252 WARN_ON(*level < 0);
2253 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2255 ret = update_nodes_refs(root, path->nodes[*level]->start,
2260 while (*level >= 0) {
2261 WARN_ON(*level < 0);
2262 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2263 cur = path->nodes[*level];
2265 if (btrfs_header_level(cur) != *level)
2268 if (path->slots[*level] >= btrfs_header_nritems(cur))
2270 /* Don't forgot to check leaf/node validation */
2272 ret = btrfs_check_leaf(root, NULL, cur);
2273 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2277 ret = process_one_leaf_v2(root, path, nrefs,
2281 ret = btrfs_check_node(root, NULL, cur);
2282 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2287 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2288 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2289 blocksize = fs_info->nodesize;
2291 ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
2294 if (!nrefs->need_check[*level - 1]) {
2295 path->slots[*level]++;
2299 next = btrfs_find_tree_block(fs_info, bytenr, blocksize);
2300 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2301 free_extent_buffer(next);
2302 reada_walk_down(root, cur, path->slots[*level]);
2303 next = read_tree_block(fs_info, bytenr, blocksize,
2305 if (!extent_buffer_uptodate(next)) {
2306 struct btrfs_key node_key;
2308 btrfs_node_key_to_cpu(path->nodes[*level],
2310 path->slots[*level]);
2311 btrfs_add_corrupt_extent_record(fs_info,
2313 path->nodes[*level]->start,
2321 ret = check_child_node(cur, path->slots[*level], next);
2325 if (btrfs_is_leaf(next))
2326 status = btrfs_check_leaf(root, NULL, next);
2328 status = btrfs_check_node(root, NULL, next);
2329 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2330 free_extent_buffer(next);
2335 *level = *level - 1;
2336 free_extent_buffer(path->nodes[*level]);
2337 path->nodes[*level] = next;
2338 path->slots[*level] = 0;
2343 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2344 struct walk_control *wc, int *level)
2347 struct extent_buffer *leaf;
2349 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2350 leaf = path->nodes[i];
2351 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2356 free_extent_buffer(path->nodes[*level]);
2357 path->nodes[*level] = NULL;
2358 BUG_ON(*level > wc->active_node);
2359 if (*level == wc->active_node)
2360 leave_shared_node(root, wc, *level);
2367 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2371 struct extent_buffer *leaf;
2373 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2374 leaf = path->nodes[i];
2375 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2380 free_extent_buffer(path->nodes[*level]);
2381 path->nodes[*level] = NULL;
2388 static int check_root_dir(struct inode_record *rec)
2390 struct inode_backref *backref;
2393 if (!rec->found_inode_item || rec->errors)
2395 if (rec->nlink != 1 || rec->found_link != 0)
2397 if (list_empty(&rec->backrefs))
2399 backref = to_inode_backref(rec->backrefs.next);
2400 if (!backref->found_inode_ref)
2402 if (backref->index != 0 || backref->namelen != 2 ||
2403 memcmp(backref->name, "..", 2))
2405 if (backref->found_dir_index || backref->found_dir_item)
2412 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2413 struct btrfs_root *root, struct btrfs_path *path,
2414 struct inode_record *rec)
2416 struct btrfs_inode_item *ei;
2417 struct btrfs_key key;
2420 key.objectid = rec->ino;
2421 key.type = BTRFS_INODE_ITEM_KEY;
2422 key.offset = (u64)-1;
2424 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2428 if (!path->slots[0]) {
2435 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2436 if (key.objectid != rec->ino) {
2441 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2442 struct btrfs_inode_item);
2443 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2444 btrfs_mark_buffer_dirty(path->nodes[0]);
2445 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2446 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2447 root->root_key.objectid);
2449 btrfs_release_path(path);
2453 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2454 struct btrfs_root *root,
2455 struct btrfs_path *path,
2456 struct inode_record *rec)
2460 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2461 btrfs_release_path(path);
2463 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2467 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2468 struct btrfs_root *root,
2469 struct btrfs_path *path,
2470 struct inode_record *rec)
2472 struct btrfs_inode_item *ei;
2473 struct btrfs_key key;
2476 key.objectid = rec->ino;
2477 key.type = BTRFS_INODE_ITEM_KEY;
2480 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2487 /* Since ret == 0, no need to check anything */
2488 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2489 struct btrfs_inode_item);
2490 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2491 btrfs_mark_buffer_dirty(path->nodes[0]);
2492 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2493 printf("reset nbytes for ino %llu root %llu\n",
2494 rec->ino, root->root_key.objectid);
2496 btrfs_release_path(path);
2500 static int add_missing_dir_index(struct btrfs_root *root,
2501 struct cache_tree *inode_cache,
2502 struct inode_record *rec,
2503 struct inode_backref *backref)
2505 struct btrfs_path path;
2506 struct btrfs_trans_handle *trans;
2507 struct btrfs_dir_item *dir_item;
2508 struct extent_buffer *leaf;
2509 struct btrfs_key key;
2510 struct btrfs_disk_key disk_key;
2511 struct inode_record *dir_rec;
2512 unsigned long name_ptr;
2513 u32 data_size = sizeof(*dir_item) + backref->namelen;
2516 trans = btrfs_start_transaction(root, 1);
2518 return PTR_ERR(trans);
2520 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2521 (unsigned long long)rec->ino);
2523 btrfs_init_path(&path);
2524 key.objectid = backref->dir;
2525 key.type = BTRFS_DIR_INDEX_KEY;
2526 key.offset = backref->index;
2527 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2530 leaf = path.nodes[0];
2531 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2533 disk_key.objectid = cpu_to_le64(rec->ino);
2534 disk_key.type = BTRFS_INODE_ITEM_KEY;
2535 disk_key.offset = 0;
2537 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2538 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2539 btrfs_set_dir_data_len(leaf, dir_item, 0);
2540 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2541 name_ptr = (unsigned long)(dir_item + 1);
2542 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2543 btrfs_mark_buffer_dirty(leaf);
2544 btrfs_release_path(&path);
2545 btrfs_commit_transaction(trans, root);
2547 backref->found_dir_index = 1;
2548 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2549 BUG_ON(IS_ERR(dir_rec));
2552 dir_rec->found_size += backref->namelen;
2553 if (dir_rec->found_size == dir_rec->isize &&
2554 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2555 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2556 if (dir_rec->found_size != dir_rec->isize)
2557 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2562 static int delete_dir_index(struct btrfs_root *root,
2563 struct inode_backref *backref)
2565 struct btrfs_trans_handle *trans;
2566 struct btrfs_dir_item *di;
2567 struct btrfs_path path;
2570 trans = btrfs_start_transaction(root, 1);
2572 return PTR_ERR(trans);
2574 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2575 (unsigned long long)backref->dir,
2576 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2577 (unsigned long long)root->objectid);
2579 btrfs_init_path(&path);
2580 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2581 backref->name, backref->namelen,
2582 backref->index, -1);
2585 btrfs_release_path(&path);
2586 btrfs_commit_transaction(trans, root);
2593 ret = btrfs_del_item(trans, root, &path);
2595 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2597 btrfs_release_path(&path);
2598 btrfs_commit_transaction(trans, root);
2602 static int create_inode_item(struct btrfs_root *root,
2603 struct inode_record *rec,
2606 struct btrfs_trans_handle *trans;
2607 struct btrfs_inode_item inode_item;
2608 time_t now = time(NULL);
2611 trans = btrfs_start_transaction(root, 1);
2612 if (IS_ERR(trans)) {
2613 ret = PTR_ERR(trans);
2617 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2618 "be incomplete, please check permissions and content after "
2619 "the fsck completes.\n", (unsigned long long)root->objectid,
2620 (unsigned long long)rec->ino);
2622 memset(&inode_item, 0, sizeof(inode_item));
2623 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2625 btrfs_set_stack_inode_nlink(&inode_item, 1);
2627 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2628 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2629 if (rec->found_dir_item) {
2630 if (rec->found_file_extent)
2631 fprintf(stderr, "root %llu inode %llu has both a dir "
2632 "item and extents, unsure if it is a dir or a "
2633 "regular file so setting it as a directory\n",
2634 (unsigned long long)root->objectid,
2635 (unsigned long long)rec->ino);
2636 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2637 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2638 } else if (!rec->found_dir_item) {
2639 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2640 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2642 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2643 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2644 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2645 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2646 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2647 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2648 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2649 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2651 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2653 btrfs_commit_transaction(trans, root);
2657 static int repair_inode_backrefs(struct btrfs_root *root,
2658 struct inode_record *rec,
2659 struct cache_tree *inode_cache,
2662 struct inode_backref *tmp, *backref;
2663 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2667 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2668 if (!delete && rec->ino == root_dirid) {
2669 if (!rec->found_inode_item) {
2670 ret = create_inode_item(root, rec, 1);
2677 /* Index 0 for root dir's are special, don't mess with it */
2678 if (rec->ino == root_dirid && backref->index == 0)
2682 ((backref->found_dir_index && !backref->found_inode_ref) ||
2683 (backref->found_dir_index && backref->found_inode_ref &&
2684 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2685 ret = delete_dir_index(root, backref);
2689 list_del(&backref->list);
2694 if (!delete && !backref->found_dir_index &&
2695 backref->found_dir_item && backref->found_inode_ref) {
2696 ret = add_missing_dir_index(root, inode_cache, rec,
2701 if (backref->found_dir_item &&
2702 backref->found_dir_index) {
2703 if (!backref->errors &&
2704 backref->found_inode_ref) {
2705 list_del(&backref->list);
2712 if (!delete && (!backref->found_dir_index &&
2713 !backref->found_dir_item &&
2714 backref->found_inode_ref)) {
2715 struct btrfs_trans_handle *trans;
2716 struct btrfs_key location;
2718 ret = check_dir_conflict(root, backref->name,
2724 * let nlink fixing routine to handle it,
2725 * which can do it better.
2730 location.objectid = rec->ino;
2731 location.type = BTRFS_INODE_ITEM_KEY;
2732 location.offset = 0;
2734 trans = btrfs_start_transaction(root, 1);
2735 if (IS_ERR(trans)) {
2736 ret = PTR_ERR(trans);
2739 fprintf(stderr, "adding missing dir index/item pair "
2741 (unsigned long long)rec->ino);
2742 ret = btrfs_insert_dir_item(trans, root, backref->name,
2744 backref->dir, &location,
2745 imode_to_type(rec->imode),
2748 btrfs_commit_transaction(trans, root);
2752 if (!delete && (backref->found_inode_ref &&
2753 backref->found_dir_index &&
2754 backref->found_dir_item &&
2755 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2756 !rec->found_inode_item)) {
2757 ret = create_inode_item(root, rec, 0);
2764 return ret ? ret : repaired;
2768 * To determine the file type for nlink/inode_item repair
2770 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2771 * Return -ENOENT if file type is not found.
2773 static int find_file_type(struct inode_record *rec, u8 *type)
2775 struct inode_backref *backref;
2777 /* For inode item recovered case */
2778 if (rec->found_inode_item) {
2779 *type = imode_to_type(rec->imode);
2783 list_for_each_entry(backref, &rec->backrefs, list) {
2784 if (backref->found_dir_index || backref->found_dir_item) {
2785 *type = backref->filetype;
2793 * To determine the file name for nlink repair
2795 * Return 0 if file name is found, set name and namelen.
2796 * Return -ENOENT if file name is not found.
2798 static int find_file_name(struct inode_record *rec,
2799 char *name, int *namelen)
2801 struct inode_backref *backref;
2803 list_for_each_entry(backref, &rec->backrefs, list) {
2804 if (backref->found_dir_index || backref->found_dir_item ||
2805 backref->found_inode_ref) {
2806 memcpy(name, backref->name, backref->namelen);
2807 *namelen = backref->namelen;
2814 /* Reset the nlink of the inode to the correct one */
2815 static int reset_nlink(struct btrfs_trans_handle *trans,
2816 struct btrfs_root *root,
2817 struct btrfs_path *path,
2818 struct inode_record *rec)
2820 struct inode_backref *backref;
2821 struct inode_backref *tmp;
2822 struct btrfs_key key;
2823 struct btrfs_inode_item *inode_item;
2826 /* We don't believe this either, reset it and iterate backref */
2827 rec->found_link = 0;
2829 /* Remove all backref including the valid ones */
2830 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2831 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2832 backref->index, backref->name,
2833 backref->namelen, 0);
2837 /* remove invalid backref, so it won't be added back */
2838 if (!(backref->found_dir_index &&
2839 backref->found_dir_item &&
2840 backref->found_inode_ref)) {
2841 list_del(&backref->list);
2848 /* Set nlink to 0 */
2849 key.objectid = rec->ino;
2850 key.type = BTRFS_INODE_ITEM_KEY;
2852 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2859 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2860 struct btrfs_inode_item);
2861 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2862 btrfs_mark_buffer_dirty(path->nodes[0]);
2863 btrfs_release_path(path);
2866 * Add back valid inode_ref/dir_item/dir_index,
2867 * add_link() will handle the nlink inc, so new nlink must be correct
2869 list_for_each_entry(backref, &rec->backrefs, list) {
2870 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2871 backref->name, backref->namelen,
2872 backref->filetype, &backref->index, 1);
2877 btrfs_release_path(path);
2881 static int get_highest_inode(struct btrfs_trans_handle *trans,
2882 struct btrfs_root *root,
2883 struct btrfs_path *path,
2886 struct btrfs_key key, found_key;
2889 btrfs_init_path(path);
2890 key.objectid = BTRFS_LAST_FREE_OBJECTID;
2892 key.type = BTRFS_INODE_ITEM_KEY;
2893 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2895 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2896 path->slots[0] - 1);
2897 *highest_ino = found_key.objectid;
2900 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2902 btrfs_release_path(path);
2906 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2907 struct btrfs_root *root,
2908 struct btrfs_path *path,
2909 struct inode_record *rec)
2911 char *dir_name = "lost+found";
2912 char namebuf[BTRFS_NAME_LEN] = {0};
2917 int name_recovered = 0;
2918 int type_recovered = 0;
2922 * Get file name and type first before these invalid inode ref
2923 * are deleted by remove_all_invalid_backref()
2925 name_recovered = !find_file_name(rec, namebuf, &namelen);
2926 type_recovered = !find_file_type(rec, &type);
2928 if (!name_recovered) {
2929 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2930 rec->ino, rec->ino);
2931 namelen = count_digits(rec->ino);
2932 sprintf(namebuf, "%llu", rec->ino);
2935 if (!type_recovered) {
2936 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2938 type = BTRFS_FT_REG_FILE;
2942 ret = reset_nlink(trans, root, path, rec);
2945 "Failed to reset nlink for inode %llu: %s\n",
2946 rec->ino, strerror(-ret));
2950 if (rec->found_link == 0) {
2951 ret = get_highest_inode(trans, root, path, &lost_found_ino);
2955 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2956 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2959 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2960 dir_name, strerror(-ret));
2963 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2964 namebuf, namelen, type, NULL, 1);
2966 * Add ".INO" suffix several times to handle case where
2967 * "FILENAME.INO" is already taken by another file.
2969 while (ret == -EEXIST) {
2971 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2973 if (namelen + count_digits(rec->ino) + 1 >
2978 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2980 namelen += count_digits(rec->ino) + 1;
2981 ret = btrfs_add_link(trans, root, rec->ino,
2982 lost_found_ino, namebuf,
2983 namelen, type, NULL, 1);
2987 "Failed to link the inode %llu to %s dir: %s\n",
2988 rec->ino, dir_name, strerror(-ret));
2992 * Just increase the found_link, don't actually add the
2993 * backref. This will make things easier and this inode
2994 * record will be freed after the repair is done.
2995 * So fsck will not report problem about this inode.
2998 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2999 namelen, namebuf, dir_name);
3001 printf("Fixed the nlink of inode %llu\n", rec->ino);
3004 * Clear the flag anyway, or we will loop forever for the same inode
3005 * as it will not be removed from the bad inode list and the dead loop
3008 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3009 btrfs_release_path(path);
3014 * Check if there is any normal(reg or prealloc) file extent for given
3016 * This is used to determine the file type when neither its dir_index/item or
3017 * inode_item exists.
3019 * This will *NOT* report error, if any error happens, just consider it does
3020 * not have any normal file extent.
3022 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3024 struct btrfs_path path;
3025 struct btrfs_key key;
3026 struct btrfs_key found_key;
3027 struct btrfs_file_extent_item *fi;
3031 btrfs_init_path(&path);
3033 key.type = BTRFS_EXTENT_DATA_KEY;
3036 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3041 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3042 ret = btrfs_next_leaf(root, &path);
3049 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3051 if (found_key.objectid != ino ||
3052 found_key.type != BTRFS_EXTENT_DATA_KEY)
3054 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3055 struct btrfs_file_extent_item);
3056 type = btrfs_file_extent_type(path.nodes[0], fi);
3057 if (type != BTRFS_FILE_EXTENT_INLINE) {
3063 btrfs_release_path(&path);
3067 static u32 btrfs_type_to_imode(u8 type)
3069 static u32 imode_by_btrfs_type[] = {
3070 [BTRFS_FT_REG_FILE] = S_IFREG,
3071 [BTRFS_FT_DIR] = S_IFDIR,
3072 [BTRFS_FT_CHRDEV] = S_IFCHR,
3073 [BTRFS_FT_BLKDEV] = S_IFBLK,
3074 [BTRFS_FT_FIFO] = S_IFIFO,
3075 [BTRFS_FT_SOCK] = S_IFSOCK,
3076 [BTRFS_FT_SYMLINK] = S_IFLNK,
3079 return imode_by_btrfs_type[(type)];
3082 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3083 struct btrfs_root *root,
3084 struct btrfs_path *path,
3085 struct inode_record *rec)
3089 int type_recovered = 0;
3092 printf("Trying to rebuild inode:%llu\n", rec->ino);
3094 type_recovered = !find_file_type(rec, &filetype);
3097 * Try to determine inode type if type not found.
3099 * For found regular file extent, it must be FILE.
3100 * For found dir_item/index, it must be DIR.
3102 * For undetermined one, use FILE as fallback.
3105 * 1. If found backref(inode_index/item is already handled) to it,
3107 * Need new inode-inode ref structure to allow search for that.
3109 if (!type_recovered) {
3110 if (rec->found_file_extent &&
3111 find_normal_file_extent(root, rec->ino)) {
3113 filetype = BTRFS_FT_REG_FILE;
3114 } else if (rec->found_dir_item) {
3116 filetype = BTRFS_FT_DIR;
3117 } else if (!list_empty(&rec->orphan_extents)) {
3119 filetype = BTRFS_FT_REG_FILE;
3121 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3124 filetype = BTRFS_FT_REG_FILE;
3128 ret = btrfs_new_inode(trans, root, rec->ino,
3129 mode | btrfs_type_to_imode(filetype));
3134 * Here inode rebuild is done, we only rebuild the inode item,
3135 * don't repair the nlink(like move to lost+found).
3136 * That is the job of nlink repair.
3138 * We just fill the record and return
3140 rec->found_dir_item = 1;
3141 rec->imode = mode | btrfs_type_to_imode(filetype);
3143 rec->errors &= ~I_ERR_NO_INODE_ITEM;
3144 /* Ensure the inode_nlinks repair function will be called */
3145 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3150 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3151 struct btrfs_root *root,
3152 struct btrfs_path *path,
3153 struct inode_record *rec)
3155 struct orphan_data_extent *orphan;
3156 struct orphan_data_extent *tmp;
3159 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3161 * Check for conflicting file extents
3163 * Here we don't know whether the extents is compressed or not,
3164 * so we can only assume it not compressed nor data offset,
3165 * and use its disk_len as extent length.
3167 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3168 orphan->offset, orphan->disk_len, 0);
3169 btrfs_release_path(path);
3174 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3175 orphan->disk_bytenr, orphan->disk_len);
3176 ret = btrfs_free_extent(trans,
3177 root->fs_info->extent_root,
3178 orphan->disk_bytenr, orphan->disk_len,
3179 0, root->objectid, orphan->objectid,
3184 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3185 orphan->offset, orphan->disk_bytenr,
3186 orphan->disk_len, orphan->disk_len);
3190 /* Update file size info */
3191 rec->found_size += orphan->disk_len;
3192 if (rec->found_size == rec->nbytes)
3193 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3195 /* Update the file extent hole info too */
3196 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3200 if (RB_EMPTY_ROOT(&rec->holes))
3201 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3203 list_del(&orphan->list);
3206 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3211 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3212 struct btrfs_root *root,
3213 struct btrfs_path *path,
3214 struct inode_record *rec)
3216 struct rb_node *node;
3217 struct file_extent_hole *hole;
3221 node = rb_first(&rec->holes);
3225 hole = rb_entry(node, struct file_extent_hole, node);
3226 ret = btrfs_punch_hole(trans, root, rec->ino,
3227 hole->start, hole->len);
3230 ret = del_file_extent_hole(&rec->holes, hole->start,
3234 if (RB_EMPTY_ROOT(&rec->holes))
3235 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3236 node = rb_first(&rec->holes);
3238 /* special case for a file losing all its file extent */
3240 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3241 round_up(rec->isize,
3242 root->fs_info->sectorsize));
3246 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3247 rec->ino, root->objectid);
3252 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3254 struct btrfs_trans_handle *trans;
3255 struct btrfs_path path;
3258 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3259 I_ERR_NO_ORPHAN_ITEM |
3260 I_ERR_LINK_COUNT_WRONG |
3261 I_ERR_NO_INODE_ITEM |
3262 I_ERR_FILE_EXTENT_ORPHAN |
3263 I_ERR_FILE_EXTENT_DISCOUNT|
3264 I_ERR_FILE_NBYTES_WRONG)))
3268 * For nlink repair, it may create a dir and add link, so
3269 * 2 for parent(256)'s dir_index and dir_item
3270 * 2 for lost+found dir's inode_item and inode_ref
3271 * 1 for the new inode_ref of the file
3272 * 2 for lost+found dir's dir_index and dir_item for the file
3274 trans = btrfs_start_transaction(root, 7);
3276 return PTR_ERR(trans);
3278 btrfs_init_path(&path);
3279 if (rec->errors & I_ERR_NO_INODE_ITEM)
3280 ret = repair_inode_no_item(trans, root, &path, rec);
3281 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3282 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3283 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3284 ret = repair_inode_discount_extent(trans, root, &path, rec);
3285 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3286 ret = repair_inode_isize(trans, root, &path, rec);
3287 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3288 ret = repair_inode_orphan_item(trans, root, &path, rec);
3289 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3290 ret = repair_inode_nlinks(trans, root, &path, rec);
3291 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3292 ret = repair_inode_nbytes(trans, root, &path, rec);
3293 btrfs_commit_transaction(trans, root);
3294 btrfs_release_path(&path);
3298 static int check_inode_recs(struct btrfs_root *root,
3299 struct cache_tree *inode_cache)
3301 struct cache_extent *cache;
3302 struct ptr_node *node;
3303 struct inode_record *rec;
3304 struct inode_backref *backref;
3309 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3311 if (btrfs_root_refs(&root->root_item) == 0) {
3312 if (!cache_tree_empty(inode_cache))
3313 fprintf(stderr, "warning line %d\n", __LINE__);
3318 * We need to repair backrefs first because we could change some of the
3319 * errors in the inode recs.
3321 * We also need to go through and delete invalid backrefs first and then
3322 * add the correct ones second. We do this because we may get EEXIST
3323 * when adding back the correct index because we hadn't yet deleted the
3326 * For example, if we were missing a dir index then the directories
3327 * isize would be wrong, so if we fixed the isize to what we thought it
3328 * would be and then fixed the backref we'd still have a invalid fs, so
3329 * we need to add back the dir index and then check to see if the isize
3334 if (stage == 3 && !err)
3337 cache = search_cache_extent(inode_cache, 0);
3338 while (repair && cache) {
3339 node = container_of(cache, struct ptr_node, cache);
3341 cache = next_cache_extent(cache);
3343 /* Need to free everything up and rescan */
3345 remove_cache_extent(inode_cache, &node->cache);
3347 free_inode_rec(rec);
3351 if (list_empty(&rec->backrefs))
3354 ret = repair_inode_backrefs(root, rec, inode_cache,
3368 rec = get_inode_rec(inode_cache, root_dirid, 0);
3369 BUG_ON(IS_ERR(rec));
3371 ret = check_root_dir(rec);
3373 fprintf(stderr, "root %llu root dir %llu error\n",
3374 (unsigned long long)root->root_key.objectid,
3375 (unsigned long long)root_dirid);
3376 print_inode_error(root, rec);
3381 struct btrfs_trans_handle *trans;
3383 trans = btrfs_start_transaction(root, 1);
3384 if (IS_ERR(trans)) {
3385 err = PTR_ERR(trans);
3390 "root %llu missing its root dir, recreating\n",
3391 (unsigned long long)root->objectid);
3393 ret = btrfs_make_root_dir(trans, root, root_dirid);
3396 btrfs_commit_transaction(trans, root);
3400 fprintf(stderr, "root %llu root dir %llu not found\n",
3401 (unsigned long long)root->root_key.objectid,
3402 (unsigned long long)root_dirid);
3406 cache = search_cache_extent(inode_cache, 0);
3409 node = container_of(cache, struct ptr_node, cache);
3411 remove_cache_extent(inode_cache, &node->cache);
3413 if (rec->ino == root_dirid ||
3414 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3415 free_inode_rec(rec);
3419 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3420 ret = check_orphan_item(root, rec->ino);
3422 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3423 if (can_free_inode_rec(rec)) {
3424 free_inode_rec(rec);
3429 if (!rec->found_inode_item)
3430 rec->errors |= I_ERR_NO_INODE_ITEM;
3431 if (rec->found_link != rec->nlink)
3432 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3434 ret = try_repair_inode(root, rec);
3435 if (ret == 0 && can_free_inode_rec(rec)) {
3436 free_inode_rec(rec);
3442 if (!(repair && ret == 0))
3444 print_inode_error(root, rec);
3445 list_for_each_entry(backref, &rec->backrefs, list) {
3446 if (!backref->found_dir_item)
3447 backref->errors |= REF_ERR_NO_DIR_ITEM;
3448 if (!backref->found_dir_index)
3449 backref->errors |= REF_ERR_NO_DIR_INDEX;
3450 if (!backref->found_inode_ref)
3451 backref->errors |= REF_ERR_NO_INODE_REF;
3452 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3453 " namelen %u name %s filetype %d errors %x",
3454 (unsigned long long)backref->dir,
3455 (unsigned long long)backref->index,
3456 backref->namelen, backref->name,
3457 backref->filetype, backref->errors);
3458 print_ref_error(backref->errors);
3460 free_inode_rec(rec);
3462 return (error > 0) ? -1 : 0;
3465 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3468 struct cache_extent *cache;
3469 struct root_record *rec = NULL;
3472 cache = lookup_cache_extent(root_cache, objectid, 1);
3474 rec = container_of(cache, struct root_record, cache);
3476 rec = calloc(1, sizeof(*rec));
3478 return ERR_PTR(-ENOMEM);
3479 rec->objectid = objectid;
3480 INIT_LIST_HEAD(&rec->backrefs);
3481 rec->cache.start = objectid;
3482 rec->cache.size = 1;
3484 ret = insert_cache_extent(root_cache, &rec->cache);
3486 return ERR_PTR(-EEXIST);
3491 static struct root_backref *get_root_backref(struct root_record *rec,
3492 u64 ref_root, u64 dir, u64 index,
3493 const char *name, int namelen)
3495 struct root_backref *backref;
3497 list_for_each_entry(backref, &rec->backrefs, list) {
3498 if (backref->ref_root != ref_root || backref->dir != dir ||
3499 backref->namelen != namelen)
3501 if (memcmp(name, backref->name, namelen))
3506 backref = calloc(1, sizeof(*backref) + namelen + 1);
3509 backref->ref_root = ref_root;
3511 backref->index = index;
3512 backref->namelen = namelen;
3513 memcpy(backref->name, name, namelen);
3514 backref->name[namelen] = '\0';
3515 list_add_tail(&backref->list, &rec->backrefs);
3519 static void free_root_record(struct cache_extent *cache)
3521 struct root_record *rec;
3522 struct root_backref *backref;
3524 rec = container_of(cache, struct root_record, cache);
3525 while (!list_empty(&rec->backrefs)) {
3526 backref = to_root_backref(rec->backrefs.next);
3527 list_del(&backref->list);
3534 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3536 static int add_root_backref(struct cache_tree *root_cache,
3537 u64 root_id, u64 ref_root, u64 dir, u64 index,
3538 const char *name, int namelen,
3539 int item_type, int errors)
3541 struct root_record *rec;
3542 struct root_backref *backref;
3544 rec = get_root_rec(root_cache, root_id);
3545 BUG_ON(IS_ERR(rec));
3546 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3549 backref->errors |= errors;
3551 if (item_type != BTRFS_DIR_ITEM_KEY) {
3552 if (backref->found_dir_index || backref->found_back_ref ||
3553 backref->found_forward_ref) {
3554 if (backref->index != index)
3555 backref->errors |= REF_ERR_INDEX_UNMATCH;
3557 backref->index = index;
3561 if (item_type == BTRFS_DIR_ITEM_KEY) {
3562 if (backref->found_forward_ref)
3564 backref->found_dir_item = 1;
3565 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3566 backref->found_dir_index = 1;
3567 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3568 if (backref->found_forward_ref)
3569 backref->errors |= REF_ERR_DUP_ROOT_REF;
3570 else if (backref->found_dir_item)
3572 backref->found_forward_ref = 1;
3573 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3574 if (backref->found_back_ref)
3575 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3576 backref->found_back_ref = 1;
3581 if (backref->found_forward_ref && backref->found_dir_item)
3582 backref->reachable = 1;
3586 static int merge_root_recs(struct btrfs_root *root,
3587 struct cache_tree *src_cache,
3588 struct cache_tree *dst_cache)
3590 struct cache_extent *cache;
3591 struct ptr_node *node;
3592 struct inode_record *rec;
3593 struct inode_backref *backref;
3596 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3597 free_inode_recs_tree(src_cache);
3602 cache = search_cache_extent(src_cache, 0);
3605 node = container_of(cache, struct ptr_node, cache);
3607 remove_cache_extent(src_cache, &node->cache);
3610 ret = is_child_root(root, root->objectid, rec->ino);
3616 list_for_each_entry(backref, &rec->backrefs, list) {
3617 BUG_ON(backref->found_inode_ref);
3618 if (backref->found_dir_item)
3619 add_root_backref(dst_cache, rec->ino,
3620 root->root_key.objectid, backref->dir,
3621 backref->index, backref->name,
3622 backref->namelen, BTRFS_DIR_ITEM_KEY,
3624 if (backref->found_dir_index)
3625 add_root_backref(dst_cache, rec->ino,
3626 root->root_key.objectid, backref->dir,
3627 backref->index, backref->name,
3628 backref->namelen, BTRFS_DIR_INDEX_KEY,
3632 free_inode_rec(rec);
3639 static int check_root_refs(struct btrfs_root *root,
3640 struct cache_tree *root_cache)
3642 struct root_record *rec;
3643 struct root_record *ref_root;
3644 struct root_backref *backref;
3645 struct cache_extent *cache;
3651 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3652 BUG_ON(IS_ERR(rec));
3655 /* fixme: this can not detect circular references */
3658 cache = search_cache_extent(root_cache, 0);
3662 rec = container_of(cache, struct root_record, cache);
3663 cache = next_cache_extent(cache);
3665 if (rec->found_ref == 0)
3668 list_for_each_entry(backref, &rec->backrefs, list) {
3669 if (!backref->reachable)
3672 ref_root = get_root_rec(root_cache,
3674 BUG_ON(IS_ERR(ref_root));
3675 if (ref_root->found_ref > 0)
3678 backref->reachable = 0;
3680 if (rec->found_ref == 0)
3686 cache = search_cache_extent(root_cache, 0);
3690 rec = container_of(cache, struct root_record, cache);
3691 cache = next_cache_extent(cache);
3693 if (rec->found_ref == 0 &&
3694 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3695 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3696 ret = check_orphan_item(root->fs_info->tree_root,
3702 * If we don't have a root item then we likely just have
3703 * a dir item in a snapshot for this root but no actual
3704 * ref key or anything so it's meaningless.
3706 if (!rec->found_root_item)
3709 fprintf(stderr, "fs tree %llu not referenced\n",
3710 (unsigned long long)rec->objectid);
3714 if (rec->found_ref > 0 && !rec->found_root_item)
3716 list_for_each_entry(backref, &rec->backrefs, list) {
3717 if (!backref->found_dir_item)
3718 backref->errors |= REF_ERR_NO_DIR_ITEM;
3719 if (!backref->found_dir_index)
3720 backref->errors |= REF_ERR_NO_DIR_INDEX;
3721 if (!backref->found_back_ref)
3722 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3723 if (!backref->found_forward_ref)
3724 backref->errors |= REF_ERR_NO_ROOT_REF;
3725 if (backref->reachable && backref->errors)
3732 fprintf(stderr, "fs tree %llu refs %u %s\n",
3733 (unsigned long long)rec->objectid, rec->found_ref,
3734 rec->found_root_item ? "" : "not found");
3736 list_for_each_entry(backref, &rec->backrefs, list) {
3737 if (!backref->reachable)
3739 if (!backref->errors && rec->found_root_item)
3741 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3742 " index %llu namelen %u name %s errors %x\n",
3743 (unsigned long long)backref->ref_root,
3744 (unsigned long long)backref->dir,
3745 (unsigned long long)backref->index,
3746 backref->namelen, backref->name,
3748 print_ref_error(backref->errors);
3751 return errors > 0 ? 1 : 0;
3754 static int process_root_ref(struct extent_buffer *eb, int slot,
3755 struct btrfs_key *key,
3756 struct cache_tree *root_cache)
3762 struct btrfs_root_ref *ref;
3763 char namebuf[BTRFS_NAME_LEN];
3766 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3768 dirid = btrfs_root_ref_dirid(eb, ref);
3769 index = btrfs_root_ref_sequence(eb, ref);
3770 name_len = btrfs_root_ref_name_len(eb, ref);
3772 if (name_len <= BTRFS_NAME_LEN) {
3776 len = BTRFS_NAME_LEN;
3777 error = REF_ERR_NAME_TOO_LONG;
3779 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3781 if (key->type == BTRFS_ROOT_REF_KEY) {
3782 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3783 index, namebuf, len, key->type, error);
3785 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3786 index, namebuf, len, key->type, error);
3791 static void free_corrupt_block(struct cache_extent *cache)
3793 struct btrfs_corrupt_block *corrupt;
3795 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3799 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3802 * Repair the btree of the given root.
3804 * The fix is to remove the node key in corrupt_blocks cache_tree.
3805 * and rebalance the tree.
3806 * After the fix, the btree should be writeable.
3808 static int repair_btree(struct btrfs_root *root,
3809 struct cache_tree *corrupt_blocks)
3811 struct btrfs_trans_handle *trans;
3812 struct btrfs_path path;
3813 struct btrfs_corrupt_block *corrupt;
3814 struct cache_extent *cache;
3815 struct btrfs_key key;
3820 if (cache_tree_empty(corrupt_blocks))
3823 trans = btrfs_start_transaction(root, 1);
3824 if (IS_ERR(trans)) {
3825 ret = PTR_ERR(trans);
3826 fprintf(stderr, "Error starting transaction: %s\n",
3830 btrfs_init_path(&path);
3831 cache = first_cache_extent(corrupt_blocks);
3833 corrupt = container_of(cache, struct btrfs_corrupt_block,
3835 level = corrupt->level;
3836 path.lowest_level = level;
3837 key.objectid = corrupt->key.objectid;
3838 key.type = corrupt->key.type;
3839 key.offset = corrupt->key.offset;
3842 * Here we don't want to do any tree balance, since it may
3843 * cause a balance with corrupted brother leaf/node,
3844 * so ins_len set to 0 here.
3845 * Balance will be done after all corrupt node/leaf is deleted.
3847 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3850 offset = btrfs_node_blockptr(path.nodes[level],
3853 /* Remove the ptr */
3854 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3858 * Remove the corresponding extent
3859 * return value is not concerned.
3861 btrfs_release_path(&path);
3862 ret = btrfs_free_extent(trans, root, offset,
3863 root->fs_info->nodesize, 0,
3864 root->root_key.objectid, level - 1, 0);
3865 cache = next_cache_extent(cache);
3868 /* Balance the btree using btrfs_search_slot() */
3869 cache = first_cache_extent(corrupt_blocks);
3871 corrupt = container_of(cache, struct btrfs_corrupt_block,
3873 memcpy(&key, &corrupt->key, sizeof(key));
3874 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3877 /* return will always >0 since it won't find the item */
3879 btrfs_release_path(&path);
3880 cache = next_cache_extent(cache);
3883 btrfs_commit_transaction(trans, root);
3884 btrfs_release_path(&path);
3888 static int check_fs_root(struct btrfs_root *root,
3889 struct cache_tree *root_cache,
3890 struct walk_control *wc)
3896 struct btrfs_path path;
3897 struct shared_node root_node;
3898 struct root_record *rec;
3899 struct btrfs_root_item *root_item = &root->root_item;
3900 struct cache_tree corrupt_blocks;
3901 struct orphan_data_extent *orphan;
3902 struct orphan_data_extent *tmp;
3903 enum btrfs_tree_block_status status;
3904 struct node_refs nrefs;
3907 * Reuse the corrupt_block cache tree to record corrupted tree block
3909 * Unlike the usage in extent tree check, here we do it in a per
3910 * fs/subvol tree base.
3912 cache_tree_init(&corrupt_blocks);
3913 root->fs_info->corrupt_blocks = &corrupt_blocks;
3915 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3916 rec = get_root_rec(root_cache, root->root_key.objectid);
3917 BUG_ON(IS_ERR(rec));
3918 if (btrfs_root_refs(root_item) > 0)
3919 rec->found_root_item = 1;
3922 btrfs_init_path(&path);
3923 memset(&root_node, 0, sizeof(root_node));
3924 cache_tree_init(&root_node.root_cache);
3925 cache_tree_init(&root_node.inode_cache);
3926 memset(&nrefs, 0, sizeof(nrefs));
3928 /* Move the orphan extent record to corresponding inode_record */
3929 list_for_each_entry_safe(orphan, tmp,
3930 &root->orphan_data_extents, list) {
3931 struct inode_record *inode;
3933 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3935 BUG_ON(IS_ERR(inode));
3936 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3937 list_move(&orphan->list, &inode->orphan_extents);
3940 level = btrfs_header_level(root->node);
3941 memset(wc->nodes, 0, sizeof(wc->nodes));
3942 wc->nodes[level] = &root_node;
3943 wc->active_node = level;
3944 wc->root_level = level;
3946 /* We may not have checked the root block, lets do that now */
3947 if (btrfs_is_leaf(root->node))
3948 status = btrfs_check_leaf(root, NULL, root->node);
3950 status = btrfs_check_node(root, NULL, root->node);
3951 if (status != BTRFS_TREE_BLOCK_CLEAN)
3954 if (btrfs_root_refs(root_item) > 0 ||
3955 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3956 path.nodes[level] = root->node;
3957 extent_buffer_get(root->node);
3958 path.slots[level] = 0;
3960 struct btrfs_key key;
3961 struct btrfs_disk_key found_key;
3963 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3964 level = root_item->drop_level;
3965 path.lowest_level = level;
3966 if (level > btrfs_header_level(root->node) ||
3967 level >= BTRFS_MAX_LEVEL) {
3968 error("ignoring invalid drop level: %u", level);
3971 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3974 btrfs_node_key(path.nodes[level], &found_key,
3976 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3977 sizeof(found_key)));
3981 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3987 wret = walk_up_tree(root, &path, wc, &level);
3994 btrfs_release_path(&path);
3996 if (!cache_tree_empty(&corrupt_blocks)) {
3997 struct cache_extent *cache;
3998 struct btrfs_corrupt_block *corrupt;
4000 printf("The following tree block(s) is corrupted in tree %llu:\n",
4001 root->root_key.objectid);
4002 cache = first_cache_extent(&corrupt_blocks);
4004 corrupt = container_of(cache,
4005 struct btrfs_corrupt_block,
4007 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4008 cache->start, corrupt->level,
4009 corrupt->key.objectid, corrupt->key.type,
4010 corrupt->key.offset);
4011 cache = next_cache_extent(cache);
4014 printf("Try to repair the btree for root %llu\n",
4015 root->root_key.objectid);
4016 ret = repair_btree(root, &corrupt_blocks);
4018 fprintf(stderr, "Failed to repair btree: %s\n",
4021 printf("Btree for root %llu is fixed\n",
4022 root->root_key.objectid);
4026 err = merge_root_recs(root, &root_node.root_cache, root_cache);
4030 if (root_node.current) {
4031 root_node.current->checked = 1;
4032 maybe_free_inode_rec(&root_node.inode_cache,
4036 err = check_inode_recs(root, &root_node.inode_cache);
4040 free_corrupt_blocks_tree(&corrupt_blocks);
4041 root->fs_info->corrupt_blocks = NULL;
4042 free_orphan_data_extents(&root->orphan_data_extents);
4046 static int fs_root_objectid(u64 objectid)
4048 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4049 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4051 return is_fstree(objectid);
4054 static int check_fs_roots(struct btrfs_root *root,
4055 struct cache_tree *root_cache)
4057 struct btrfs_path path;
4058 struct btrfs_key key;
4059 struct walk_control wc;
4060 struct extent_buffer *leaf, *tree_node;
4061 struct btrfs_root *tmp_root;
4062 struct btrfs_root *tree_root = root->fs_info->tree_root;
4066 if (ctx.progress_enabled) {
4067 ctx.tp = TASK_FS_ROOTS;
4068 task_start(ctx.info);
4072 * Just in case we made any changes to the extent tree that weren't
4073 * reflected into the free space cache yet.
4076 reset_cached_block_groups(root->fs_info);
4077 memset(&wc, 0, sizeof(wc));
4078 cache_tree_init(&wc.shared);
4079 btrfs_init_path(&path);
4084 key.type = BTRFS_ROOT_ITEM_KEY;
4085 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4090 tree_node = tree_root->node;
4092 if (tree_node != tree_root->node) {
4093 free_root_recs_tree(root_cache);
4094 btrfs_release_path(&path);
4097 leaf = path.nodes[0];
4098 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4099 ret = btrfs_next_leaf(tree_root, &path);
4105 leaf = path.nodes[0];
4107 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4108 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4109 fs_root_objectid(key.objectid)) {
4110 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4111 tmp_root = btrfs_read_fs_root_no_cache(
4112 root->fs_info, &key);
4114 key.offset = (u64)-1;
4115 tmp_root = btrfs_read_fs_root(
4116 root->fs_info, &key);
4118 if (IS_ERR(tmp_root)) {
4122 ret = check_fs_root(tmp_root, root_cache, &wc);
4123 if (ret == -EAGAIN) {
4124 free_root_recs_tree(root_cache);
4125 btrfs_release_path(&path);
4130 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4131 btrfs_free_fs_root(tmp_root);
4132 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4133 key.type == BTRFS_ROOT_BACKREF_KEY) {
4134 process_root_ref(leaf, path.slots[0], &key,
4141 btrfs_release_path(&path);
4143 free_extent_cache_tree(&wc.shared);
4144 if (!cache_tree_empty(&wc.shared))
4145 fprintf(stderr, "warning line %d\n", __LINE__);
4147 task_stop(ctx.info);
4153 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4154 * INODE_REF/INODE_EXTREF match.
4156 * @root: the root of the fs/file tree
4157 * @ref_key: the key of the INODE_REF/INODE_EXTREF
4158 * @key: the key of the DIR_ITEM/DIR_INDEX
4159 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
4160 * distinguish root_dir between normal dir/file
4161 * @name: the name in the INODE_REF/INODE_EXTREF
4162 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4163 * @mode: the st_mode of INODE_ITEM
4165 * Return 0 if no error occurred.
4166 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
4167 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
4169 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
4170 * not match for normal dir/file.
4172 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
4173 struct btrfs_key *key, u64 index, char *name,
4174 u32 namelen, u32 mode)
4176 struct btrfs_path path;
4177 struct extent_buffer *node;
4178 struct btrfs_dir_item *di;
4179 struct btrfs_key location;
4180 char namebuf[BTRFS_NAME_LEN] = {0};
4190 btrfs_init_path(&path);
4191 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4193 ret = DIR_ITEM_MISSING;
4197 /* Process root dir and goto out*/
4200 ret = ROOT_DIR_ERROR;
4202 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
4204 ref_key->type == BTRFS_INODE_REF_KEY ?
4206 ref_key->objectid, ref_key->offset,
4207 key->type == BTRFS_DIR_ITEM_KEY ?
4208 "DIR_ITEM" : "DIR_INDEX");
4216 /* Process normal file/dir */
4218 ret = DIR_ITEM_MISSING;
4220 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
4222 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4223 ref_key->objectid, ref_key->offset,
4224 key->type == BTRFS_DIR_ITEM_KEY ?
4225 "DIR_ITEM" : "DIR_INDEX",
4226 key->objectid, key->offset, namelen, name,
4227 imode_to_type(mode));
4231 /* Check whether inode_id/filetype/name match */
4232 node = path.nodes[0];
4233 slot = path.slots[0];
4234 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4235 total = btrfs_item_size_nr(node, slot);
4236 while (cur < total) {
4237 ret = DIR_ITEM_MISMATCH;
4238 name_len = btrfs_dir_name_len(node, di);
4239 data_len = btrfs_dir_data_len(node, di);
4241 btrfs_dir_item_key_to_cpu(node, di, &location);
4242 if (location.objectid != ref_key->objectid ||
4243 location.type != BTRFS_INODE_ITEM_KEY ||
4244 location.offset != 0)
4247 filetype = btrfs_dir_type(node, di);
4248 if (imode_to_type(mode) != filetype)
4251 if (cur + sizeof(*di) + name_len > total ||
4252 name_len > BTRFS_NAME_LEN) {
4253 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4255 key->type == BTRFS_DIR_ITEM_KEY ?
4256 "DIR_ITEM" : "DIR_INDEX",
4257 key->objectid, key->offset, name_len);
4259 if (cur + sizeof(*di) > total)
4261 len = min_t(u32, total - cur - sizeof(*di),
4267 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4268 if (len != namelen || strncmp(namebuf, name, len))
4274 len = sizeof(*di) + name_len + data_len;
4275 di = (struct btrfs_dir_item *)((char *)di + len);
4278 if (ret == DIR_ITEM_MISMATCH)
4280 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
4282 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
4283 ref_key->objectid, ref_key->offset,
4284 key->type == BTRFS_DIR_ITEM_KEY ?
4285 "DIR_ITEM" : "DIR_INDEX",
4286 key->objectid, key->offset, namelen, name,
4287 imode_to_type(mode));
4289 btrfs_release_path(&path);
4294 * Traverse the given INODE_REF and call find_dir_item() to find related
4295 * DIR_ITEM/DIR_INDEX.
4297 * @root: the root of the fs/file tree
4298 * @ref_key: the key of the INODE_REF
4299 * @refs: the count of INODE_REF
4300 * @mode: the st_mode of INODE_ITEM
4302 * Return 0 if no error occurred.
4304 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4305 struct extent_buffer *node, int slot, u64 *refs,
4308 struct btrfs_key key;
4309 struct btrfs_inode_ref *ref;
4310 char namebuf[BTRFS_NAME_LEN] = {0};
4318 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4319 total = btrfs_item_size_nr(node, slot);
4322 /* Update inode ref count */
4325 index = btrfs_inode_ref_index(node, ref);
4326 name_len = btrfs_inode_ref_name_len(node, ref);
4327 if (cur + sizeof(*ref) + name_len > total ||
4328 name_len > BTRFS_NAME_LEN) {
4329 warning("root %llu INODE_REF[%llu %llu] name too long",
4330 root->objectid, ref_key->objectid, ref_key->offset);
4332 if (total < cur + sizeof(*ref))
4334 len = min_t(u32, total - cur - sizeof(*ref), BTRFS_NAME_LEN);
4339 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4341 /* Check root dir ref name */
4342 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4343 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4344 root->objectid, ref_key->objectid, ref_key->offset,
4346 err |= ROOT_DIR_ERROR;
4349 /* Find related DIR_INDEX */
4350 key.objectid = ref_key->offset;
4351 key.type = BTRFS_DIR_INDEX_KEY;
4353 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4356 /* Find related dir_item */
4357 key.objectid = ref_key->offset;
4358 key.type = BTRFS_DIR_ITEM_KEY;
4359 key.offset = btrfs_name_hash(namebuf, len);
4360 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4363 len = sizeof(*ref) + name_len;
4364 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4374 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4375 * DIR_ITEM/DIR_INDEX.
4377 * @root: the root of the fs/file tree
4378 * @ref_key: the key of the INODE_EXTREF
4379 * @refs: the count of INODE_EXTREF
4380 * @mode: the st_mode of INODE_ITEM
4382 * Return 0 if no error occurred.
4384 static int check_inode_extref(struct btrfs_root *root,
4385 struct btrfs_key *ref_key,
4386 struct extent_buffer *node, int slot, u64 *refs,
4389 struct btrfs_key key;
4390 struct btrfs_inode_extref *extref;
4391 char namebuf[BTRFS_NAME_LEN] = {0};
4401 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4402 total = btrfs_item_size_nr(node, slot);
4405 /* update inode ref count */
4407 name_len = btrfs_inode_extref_name_len(node, extref);
4408 index = btrfs_inode_extref_index(node, extref);
4409 parent = btrfs_inode_extref_parent(node, extref);
4410 if (name_len <= BTRFS_NAME_LEN) {
4413 len = BTRFS_NAME_LEN;
4414 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4415 root->objectid, ref_key->objectid, ref_key->offset);
4417 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4419 /* Check root dir ref name */
4420 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4421 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4422 root->objectid, ref_key->objectid, ref_key->offset,
4424 err |= ROOT_DIR_ERROR;
4427 /* find related dir_index */
4428 key.objectid = parent;
4429 key.type = BTRFS_DIR_INDEX_KEY;
4431 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4434 /* find related dir_item */
4435 key.objectid = parent;
4436 key.type = BTRFS_DIR_ITEM_KEY;
4437 key.offset = btrfs_name_hash(namebuf, len);
4438 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4441 len = sizeof(*extref) + name_len;
4442 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4452 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4453 * DIR_ITEM/DIR_INDEX match.
4455 * @root: the root of the fs/file tree
4456 * @key: the key of the INODE_REF/INODE_EXTREF
4457 * @name: the name in the INODE_REF/INODE_EXTREF
4458 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4459 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4461 * @ext_ref: the EXTENDED_IREF feature
4463 * Return 0 if no error occurred.
4464 * Return >0 for error bitmap
4466 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4467 char *name, int namelen, u64 index,
4468 unsigned int ext_ref)
4470 struct btrfs_path path;
4471 struct btrfs_inode_ref *ref;
4472 struct btrfs_inode_extref *extref;
4473 struct extent_buffer *node;
4474 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4485 btrfs_init_path(&path);
4486 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4488 ret = INODE_REF_MISSING;
4492 node = path.nodes[0];
4493 slot = path.slots[0];
4495 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4496 total = btrfs_item_size_nr(node, slot);
4498 /* Iterate all entry of INODE_REF */
4499 while (cur < total) {
4500 ret = INODE_REF_MISSING;
4502 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4503 ref_index = btrfs_inode_ref_index(node, ref);
4504 if (index != (u64)-1 && index != ref_index)
4507 if (cur + sizeof(*ref) + ref_namelen > total ||
4508 ref_namelen > BTRFS_NAME_LEN) {
4509 warning("root %llu INODE %s[%llu %llu] name too long",
4511 key->type == BTRFS_INODE_REF_KEY ?
4513 key->objectid, key->offset);
4515 if (cur + sizeof(*ref) > total)
4517 len = min_t(u32, total - cur - sizeof(*ref),
4523 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4526 if (len != namelen || strncmp(ref_namebuf, name, len))
4532 len = sizeof(*ref) + ref_namelen;
4533 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4538 /* Skip if not support EXTENDED_IREF feature */
4542 btrfs_release_path(&path);
4543 btrfs_init_path(&path);
4545 dir_id = key->offset;
4546 key->type = BTRFS_INODE_EXTREF_KEY;
4547 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4549 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4551 ret = INODE_REF_MISSING;
4555 node = path.nodes[0];
4556 slot = path.slots[0];
4558 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4560 total = btrfs_item_size_nr(node, slot);
4562 /* Iterate all entry of INODE_EXTREF */
4563 while (cur < total) {
4564 ret = INODE_REF_MISSING;
4566 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4567 ref_index = btrfs_inode_extref_index(node, extref);
4568 parent = btrfs_inode_extref_parent(node, extref);
4569 if (index != (u64)-1 && index != ref_index)
4572 if (parent != dir_id)
4575 if (ref_namelen <= BTRFS_NAME_LEN) {
4578 len = BTRFS_NAME_LEN;
4579 warning("root %llu INODE %s[%llu %llu] name too long",
4581 key->type == BTRFS_INODE_REF_KEY ?
4583 key->objectid, key->offset);
4585 read_extent_buffer(node, ref_namebuf,
4586 (unsigned long)(extref + 1), len);
4588 if (len != namelen || strncmp(ref_namebuf, name, len))
4595 len = sizeof(*extref) + ref_namelen;
4596 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4601 btrfs_release_path(&path);
4606 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4607 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4609 * @root: the root of the fs/file tree
4610 * @key: the key of the INODE_REF/INODE_EXTREF
4611 * @size: the st_size of the INODE_ITEM
4612 * @ext_ref: the EXTENDED_IREF feature
4614 * Return 0 if no error occurred.
4616 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4617 struct extent_buffer *node, int slot, u64 *size,
4618 unsigned int ext_ref)
4620 struct btrfs_dir_item *di;
4621 struct btrfs_inode_item *ii;
4622 struct btrfs_path path;
4623 struct btrfs_key location;
4624 char namebuf[BTRFS_NAME_LEN] = {0};
4637 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4638 * ignore index check.
4640 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4642 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4643 total = btrfs_item_size_nr(node, slot);
4645 while (cur < total) {
4646 data_len = btrfs_dir_data_len(node, di);
4648 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4649 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4650 "DIR_ITEM" : "DIR_INDEX",
4651 key->objectid, key->offset, data_len);
4653 name_len = btrfs_dir_name_len(node, di);
4654 if (cur + sizeof(*di) + name_len > total ||
4655 name_len > BTRFS_NAME_LEN) {
4656 warning("root %llu %s[%llu %llu] name too long",
4658 key->type == BTRFS_DIR_ITEM_KEY ?
4659 "DIR_ITEM" : "DIR_INDEX",
4660 key->objectid, key->offset);
4662 if (cur + sizeof(*di) > total)
4664 len = min_t(u32, total - cur - sizeof(*di),
4669 (*size) += name_len;
4671 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4672 filetype = btrfs_dir_type(node, di);
4674 btrfs_init_path(&path);
4675 btrfs_dir_item_key_to_cpu(node, di, &location);
4677 /* Ignore related ROOT_ITEM check */
4678 if (location.type == BTRFS_ROOT_ITEM_KEY)
4681 /* Check relative INODE_ITEM(existence/filetype) */
4682 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4684 err |= INODE_ITEM_MISSING;
4685 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4686 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4687 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4688 key->offset, location.objectid, name_len,
4693 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4694 struct btrfs_inode_item);
4695 mode = btrfs_inode_mode(path.nodes[0], ii);
4697 if (imode_to_type(mode) != filetype) {
4698 err |= INODE_ITEM_MISMATCH;
4699 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4700 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4701 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4702 key->offset, name_len, namebuf, filetype);
4705 /* Check relative INODE_REF/INODE_EXTREF */
4706 location.type = BTRFS_INODE_REF_KEY;
4707 location.offset = key->objectid;
4708 ret = find_inode_ref(root, &location, namebuf, len,
4711 if (ret & INODE_REF_MISSING)
4712 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4713 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4714 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4715 key->offset, name_len, namebuf, filetype);
4718 btrfs_release_path(&path);
4719 len = sizeof(*di) + name_len + data_len;
4720 di = (struct btrfs_dir_item *)((char *)di + len);
4723 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4724 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4725 root->objectid, key->objectid, key->offset);
4734 * Check file extent datasum/hole, update the size of the file extents,
4735 * check and update the last offset of the file extent.
4737 * @root: the root of fs/file tree.
4738 * @fkey: the key of the file extent.
4739 * @nodatasum: INODE_NODATASUM feature.
4740 * @size: the sum of all EXTENT_DATA items size for this inode.
4741 * @end: the offset of the last extent.
4743 * Return 0 if no error occurred.
4745 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4746 struct extent_buffer *node, int slot,
4747 unsigned int nodatasum, u64 *size, u64 *end)
4749 struct btrfs_file_extent_item *fi;
4752 u64 extent_num_bytes;
4754 u64 csum_found; /* In byte size, sectorsize aligned */
4755 u64 search_start; /* Logical range start we search for csum */
4756 u64 search_len; /* Logical range len we search for csum */
4757 unsigned int extent_type;
4758 unsigned int is_hole;
4763 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4765 /* Check inline extent */
4766 extent_type = btrfs_file_extent_type(node, fi);
4767 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4768 struct btrfs_item *e = btrfs_item_nr(slot);
4769 u32 item_inline_len;
4771 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4772 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4773 compressed = btrfs_file_extent_compression(node, fi);
4774 if (extent_num_bytes == 0) {
4776 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
4777 root->objectid, fkey->objectid, fkey->offset);
4778 err |= FILE_EXTENT_ERROR;
4780 if (!compressed && extent_num_bytes != item_inline_len) {
4782 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
4783 root->objectid, fkey->objectid, fkey->offset,
4784 extent_num_bytes, item_inline_len);
4785 err |= FILE_EXTENT_ERROR;
4787 *end += extent_num_bytes;
4788 *size += extent_num_bytes;
4792 /* Check extent type */
4793 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4794 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4795 err |= FILE_EXTENT_ERROR;
4796 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4797 root->objectid, fkey->objectid, fkey->offset);
4801 /* Check REG_EXTENT/PREALLOC_EXTENT */
4802 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4803 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4804 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4805 extent_offset = btrfs_file_extent_offset(node, fi);
4806 compressed = btrfs_file_extent_compression(node, fi);
4807 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4810 * Check EXTENT_DATA csum
4812 * For plain (uncompressed) extent, we should only check the range
4813 * we're referring to, as it's possible that part of prealloc extent
4814 * has been written, and has csum:
4816 * |<--- Original large preallocated extent A ---->|
4817 * |<- Prealloc File Extent ->|<- Regular Extent ->|
4820 * For compressed extent, we should check the whole range.
4823 search_start = disk_bytenr + extent_offset;
4824 search_len = extent_num_bytes;
4826 search_start = disk_bytenr;
4827 search_len = disk_num_bytes;
4829 ret = count_csum_range(root, search_start, search_len, &csum_found);
4830 if (csum_found > 0 && nodatasum) {
4831 err |= ODD_CSUM_ITEM;
4832 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4833 root->objectid, fkey->objectid, fkey->offset);
4834 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4835 !is_hole && (ret < 0 || csum_found < search_len)) {
4836 err |= CSUM_ITEM_MISSING;
4837 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
4838 root->objectid, fkey->objectid, fkey->offset,
4839 csum_found, search_len);
4840 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
4841 err |= ODD_CSUM_ITEM;
4842 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
4843 root->objectid, fkey->objectid, fkey->offset, csum_found);
4846 /* Check EXTENT_DATA hole */
4847 if (!no_holes && *end != fkey->offset) {
4848 err |= FILE_EXTENT_ERROR;
4849 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4850 root->objectid, fkey->objectid, fkey->offset);
4853 *end += extent_num_bytes;
4855 *size += extent_num_bytes;
4861 * Check INODE_ITEM and related ITEMs (the same inode number)
4862 * 1. check link count
4863 * 2. check inode ref/extref
4864 * 3. check dir item/index
4866 * @ext_ref: the EXTENDED_IREF feature
4868 * Return 0 if no error occurred.
4869 * Return >0 for error or hit the traversal is done(by error bitmap)
4871 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4872 unsigned int ext_ref)
4874 struct extent_buffer *node;
4875 struct btrfs_inode_item *ii;
4876 struct btrfs_key key;
4885 u64 extent_size = 0;
4887 unsigned int nodatasum;
4892 node = path->nodes[0];
4893 slot = path->slots[0];
4895 btrfs_item_key_to_cpu(node, &key, slot);
4896 inode_id = key.objectid;
4898 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4899 ret = btrfs_next_item(root, path);
4905 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4906 isize = btrfs_inode_size(node, ii);
4907 nbytes = btrfs_inode_nbytes(node, ii);
4908 mode = btrfs_inode_mode(node, ii);
4909 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4910 nlink = btrfs_inode_nlink(node, ii);
4911 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4914 ret = btrfs_next_item(root, path);
4916 /* out will fill 'err' rusing current statistics */
4918 } else if (ret > 0) {
4923 node = path->nodes[0];
4924 slot = path->slots[0];
4925 btrfs_item_key_to_cpu(node, &key, slot);
4926 if (key.objectid != inode_id)
4930 case BTRFS_INODE_REF_KEY:
4931 ret = check_inode_ref(root, &key, node, slot, &refs,
4935 case BTRFS_INODE_EXTREF_KEY:
4936 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4937 warning("root %llu EXTREF[%llu %llu] isn't supported",
4938 root->objectid, key.objectid,
4940 ret = check_inode_extref(root, &key, node, slot, &refs,
4944 case BTRFS_DIR_ITEM_KEY:
4945 case BTRFS_DIR_INDEX_KEY:
4947 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4948 root->objectid, inode_id,
4949 imode_to_type(mode), key.objectid,
4952 ret = check_dir_item(root, &key, node, slot, &size,
4956 case BTRFS_EXTENT_DATA_KEY:
4958 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4959 root->objectid, inode_id, key.objectid,
4962 ret = check_file_extent(root, &key, node, slot,
4963 nodatasum, &extent_size,
4967 case BTRFS_XATTR_ITEM_KEY:
4970 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4971 key.objectid, key.type, key.offset);
4976 /* verify INODE_ITEM nlink/isize/nbytes */
4979 err |= LINK_COUNT_ERROR;
4980 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4981 root->objectid, inode_id, nlink);
4985 * Just a warning, as dir inode nbytes is just an
4986 * instructive value.
4988 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
4989 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4990 root->objectid, inode_id,
4991 root->fs_info->nodesize);
4994 if (isize != size) {
4996 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4997 root->objectid, inode_id, isize, size);
5000 if (nlink != refs) {
5001 err |= LINK_COUNT_ERROR;
5002 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5003 root->objectid, inode_id, nlink, refs);
5004 } else if (!nlink) {
5008 if (!nbytes && !no_holes && extent_end < isize) {
5009 err |= NBYTES_ERROR;
5010 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
5011 root->objectid, inode_id, isize);
5014 if (nbytes != extent_size) {
5015 err |= NBYTES_ERROR;
5016 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
5017 root->objectid, inode_id, nbytes, extent_size);
5024 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5026 struct btrfs_path path;
5027 struct btrfs_key key;
5031 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5032 key.type = BTRFS_INODE_ITEM_KEY;
5035 /* For root being dropped, we don't need to check first inode */
5036 if (btrfs_root_refs(&root->root_item) == 0 &&
5037 btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
5041 btrfs_init_path(&path);
5043 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5048 err |= INODE_ITEM_MISSING;
5049 error("first inode item of root %llu is missing",
5053 err |= check_inode_item(root, &path, ext_ref);
5058 btrfs_release_path(&path);
5063 * Iterate all item on the tree and call check_inode_item() to check.
5065 * @root: the root of the tree to be checked.
5066 * @ext_ref: the EXTENDED_IREF feature
5068 * Return 0 if no error found.
5069 * Return <0 for error.
5071 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
5073 struct btrfs_path path;
5074 struct node_refs nrefs;
5075 struct btrfs_root_item *root_item = &root->root_item;
5081 * We need to manually check the first inode item(256)
5082 * As the following traversal function will only start from
5083 * the first inode item in the leaf, if inode item(256) is missing
5084 * we will just skip it forever.
5086 ret = check_fs_first_inode(root, ext_ref);
5090 memset(&nrefs, 0, sizeof(nrefs));
5091 level = btrfs_header_level(root->node);
5092 btrfs_init_path(&path);
5094 if (btrfs_root_refs(root_item) > 0 ||
5095 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5096 path.nodes[level] = root->node;
5097 path.slots[level] = 0;
5098 extent_buffer_get(root->node);
5100 struct btrfs_key key;
5102 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5103 level = root_item->drop_level;
5104 path.lowest_level = level;
5105 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5112 ret = walk_down_tree_v2(root, &path, &level, &nrefs, ext_ref);
5115 /* if ret is negative, walk shall stop */
5121 ret = walk_up_tree_v2(root, &path, &level);
5123 /* Normal exit, reset ret to err */
5130 btrfs_release_path(&path);
5135 * Find the relative ref for root_ref and root_backref.
5137 * @root: the root of the root tree.
5138 * @ref_key: the key of the root ref.
5140 * Return 0 if no error occurred.
5142 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
5143 struct extent_buffer *node, int slot)
5145 struct btrfs_path path;
5146 struct btrfs_key key;
5147 struct btrfs_root_ref *ref;
5148 struct btrfs_root_ref *backref;
5149 char ref_name[BTRFS_NAME_LEN] = {0};
5150 char backref_name[BTRFS_NAME_LEN] = {0};
5156 u32 backref_namelen;
5161 ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
5162 ref_dirid = btrfs_root_ref_dirid(node, ref);
5163 ref_seq = btrfs_root_ref_sequence(node, ref);
5164 ref_namelen = btrfs_root_ref_name_len(node, ref);
5166 if (ref_namelen <= BTRFS_NAME_LEN) {
5169 len = BTRFS_NAME_LEN;
5170 warning("%s[%llu %llu] ref_name too long",
5171 ref_key->type == BTRFS_ROOT_REF_KEY ?
5172 "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
5175 read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
5177 /* Find relative root_ref */
5178 key.objectid = ref_key->offset;
5179 key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
5180 key.offset = ref_key->objectid;
5182 btrfs_init_path(&path);
5183 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5185 err |= ROOT_REF_MISSING;
5186 error("%s[%llu %llu] couldn't find relative ref",
5187 ref_key->type == BTRFS_ROOT_REF_KEY ?
5188 "ROOT_REF" : "ROOT_BACKREF",
5189 ref_key->objectid, ref_key->offset);
5193 backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
5194 struct btrfs_root_ref);
5195 backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
5196 backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
5197 backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
5199 if (backref_namelen <= BTRFS_NAME_LEN) {
5200 len = backref_namelen;
5202 len = BTRFS_NAME_LEN;
5203 warning("%s[%llu %llu] ref_name too long",
5204 key.type == BTRFS_ROOT_REF_KEY ?
5205 "ROOT_REF" : "ROOT_BACKREF",
5206 key.objectid, key.offset);
5208 read_extent_buffer(path.nodes[0], backref_name,
5209 (unsigned long)(backref + 1), len);
5211 if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
5212 ref_namelen != backref_namelen ||
5213 strncmp(ref_name, backref_name, len)) {
5214 err |= ROOT_REF_MISMATCH;
5215 error("%s[%llu %llu] mismatch relative ref",
5216 ref_key->type == BTRFS_ROOT_REF_KEY ?
5217 "ROOT_REF" : "ROOT_BACKREF",
5218 ref_key->objectid, ref_key->offset);
5221 btrfs_release_path(&path);
5226 * Check all fs/file tree in low_memory mode.
5228 * 1. for fs tree root item, call check_fs_root_v2()
5229 * 2. for fs tree root ref/backref, call check_root_ref()
5231 * Return 0 if no error occurred.
5233 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
5235 struct btrfs_root *tree_root = fs_info->tree_root;
5236 struct btrfs_root *cur_root = NULL;
5237 struct btrfs_path path;
5238 struct btrfs_key key;
5239 struct extent_buffer *node;
5240 unsigned int ext_ref;
5245 ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
5247 btrfs_init_path(&path);
5248 key.objectid = BTRFS_FS_TREE_OBJECTID;
5250 key.type = BTRFS_ROOT_ITEM_KEY;
5252 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
5256 } else if (ret > 0) {
5262 node = path.nodes[0];
5263 slot = path.slots[0];
5264 btrfs_item_key_to_cpu(node, &key, slot);
5265 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
5267 if (key.type == BTRFS_ROOT_ITEM_KEY &&
5268 fs_root_objectid(key.objectid)) {
5269 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
5270 cur_root = btrfs_read_fs_root_no_cache(fs_info,
5273 key.offset = (u64)-1;
5274 cur_root = btrfs_read_fs_root(fs_info, &key);
5277 if (IS_ERR(cur_root)) {
5278 error("Fail to read fs/subvol tree: %lld",
5284 ret = check_fs_root_v2(cur_root, ext_ref);
5287 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
5288 btrfs_free_fs_root(cur_root);
5289 } else if (key.type == BTRFS_ROOT_REF_KEY ||
5290 key.type == BTRFS_ROOT_BACKREF_KEY) {
5291 ret = check_root_ref(tree_root, &key, node, slot);
5295 ret = btrfs_next_item(tree_root, &path);
5305 btrfs_release_path(&path);
5309 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
5311 struct list_head *cur = rec->backrefs.next;
5312 struct extent_backref *back;
5313 struct tree_backref *tback;
5314 struct data_backref *dback;
5318 while(cur != &rec->backrefs) {
5319 back = to_extent_backref(cur);
5321 if (!back->found_extent_tree) {
5325 if (back->is_data) {
5326 dback = to_data_backref(back);
5327 fprintf(stderr, "Backref %llu %s %llu"
5328 " owner %llu offset %llu num_refs %lu"
5329 " not found in extent tree\n",
5330 (unsigned long long)rec->start,
5331 back->full_backref ?
5333 back->full_backref ?
5334 (unsigned long long)dback->parent:
5335 (unsigned long long)dback->root,
5336 (unsigned long long)dback->owner,
5337 (unsigned long long)dback->offset,
5338 (unsigned long)dback->num_refs);
5340 tback = to_tree_backref(back);
5341 fprintf(stderr, "Backref %llu parent %llu"
5342 " root %llu not found in extent tree\n",
5343 (unsigned long long)rec->start,
5344 (unsigned long long)tback->parent,
5345 (unsigned long long)tback->root);
5348 if (!back->is_data && !back->found_ref) {
5352 tback = to_tree_backref(back);
5353 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
5354 (unsigned long long)rec->start,
5355 back->full_backref ? "parent" : "root",
5356 back->full_backref ?
5357 (unsigned long long)tback->parent :
5358 (unsigned long long)tback->root, back);
5360 if (back->is_data) {
5361 dback = to_data_backref(back);
5362 if (dback->found_ref != dback->num_refs) {
5366 fprintf(stderr, "Incorrect local backref count"
5367 " on %llu %s %llu owner %llu"
5368 " offset %llu found %u wanted %u back %p\n",
5369 (unsigned long long)rec->start,
5370 back->full_backref ?
5372 back->full_backref ?
5373 (unsigned long long)dback->parent:
5374 (unsigned long long)dback->root,
5375 (unsigned long long)dback->owner,
5376 (unsigned long long)dback->offset,
5377 dback->found_ref, dback->num_refs, back);
5379 if (dback->disk_bytenr != rec->start) {
5383 fprintf(stderr, "Backref disk bytenr does not"
5384 " match extent record, bytenr=%llu, "
5385 "ref bytenr=%llu\n",
5386 (unsigned long long)rec->start,
5387 (unsigned long long)dback->disk_bytenr);
5390 if (dback->bytes != rec->nr) {
5394 fprintf(stderr, "Backref bytes do not match "
5395 "extent backref, bytenr=%llu, ref "
5396 "bytes=%llu, backref bytes=%llu\n",
5397 (unsigned long long)rec->start,
5398 (unsigned long long)rec->nr,
5399 (unsigned long long)dback->bytes);
5402 if (!back->is_data) {
5405 dback = to_data_backref(back);
5406 found += dback->found_ref;
5409 if (found != rec->refs) {
5413 fprintf(stderr, "Incorrect global backref count "
5414 "on %llu found %llu wanted %llu\n",
5415 (unsigned long long)rec->start,
5416 (unsigned long long)found,
5417 (unsigned long long)rec->refs);
5423 static int free_all_extent_backrefs(struct extent_record *rec)
5425 struct extent_backref *back;
5426 struct list_head *cur;
5427 while (!list_empty(&rec->backrefs)) {
5428 cur = rec->backrefs.next;
5429 back = to_extent_backref(cur);
5436 static void free_extent_record_cache(struct cache_tree *extent_cache)
5438 struct cache_extent *cache;
5439 struct extent_record *rec;
5442 cache = first_cache_extent(extent_cache);
5445 rec = container_of(cache, struct extent_record, cache);
5446 remove_cache_extent(extent_cache, cache);
5447 free_all_extent_backrefs(rec);
5452 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
5453 struct extent_record *rec)
5455 if (rec->content_checked && rec->owner_ref_checked &&
5456 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
5457 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
5458 !rec->bad_full_backref && !rec->crossing_stripes &&
5459 !rec->wrong_chunk_type) {
5460 remove_cache_extent(extent_cache, &rec->cache);
5461 free_all_extent_backrefs(rec);
5462 list_del_init(&rec->list);
5468 static int check_owner_ref(struct btrfs_root *root,
5469 struct extent_record *rec,
5470 struct extent_buffer *buf)
5472 struct extent_backref *node;
5473 struct tree_backref *back;
5474 struct btrfs_root *ref_root;
5475 struct btrfs_key key;
5476 struct btrfs_path path;
5477 struct extent_buffer *parent;
5482 list_for_each_entry(node, &rec->backrefs, list) {
5485 if (!node->found_ref)
5487 if (node->full_backref)
5489 back = to_tree_backref(node);
5490 if (btrfs_header_owner(buf) == back->root)
5493 BUG_ON(rec->is_root);
5495 /* try to find the block by search corresponding fs tree */
5496 key.objectid = btrfs_header_owner(buf);
5497 key.type = BTRFS_ROOT_ITEM_KEY;
5498 key.offset = (u64)-1;
5500 ref_root = btrfs_read_fs_root(root->fs_info, &key);
5501 if (IS_ERR(ref_root))
5504 level = btrfs_header_level(buf);
5506 btrfs_item_key_to_cpu(buf, &key, 0);
5508 btrfs_node_key_to_cpu(buf, &key, 0);
5510 btrfs_init_path(&path);
5511 path.lowest_level = level + 1;
5512 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
5516 parent = path.nodes[level + 1];
5517 if (parent && buf->start == btrfs_node_blockptr(parent,
5518 path.slots[level + 1]))
5521 btrfs_release_path(&path);
5522 return found ? 0 : 1;
5525 static int is_extent_tree_record(struct extent_record *rec)
5527 struct list_head *cur = rec->backrefs.next;
5528 struct extent_backref *node;
5529 struct tree_backref *back;
5532 while(cur != &rec->backrefs) {
5533 node = to_extent_backref(cur);
5537 back = to_tree_backref(node);
5538 if (node->full_backref)
5540 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
5547 static int record_bad_block_io(struct btrfs_fs_info *info,
5548 struct cache_tree *extent_cache,
5551 struct extent_record *rec;
5552 struct cache_extent *cache;
5553 struct btrfs_key key;
5555 cache = lookup_cache_extent(extent_cache, start, len);
5559 rec = container_of(cache, struct extent_record, cache);
5560 if (!is_extent_tree_record(rec))
5563 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
5564 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
5567 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
5568 struct extent_buffer *buf, int slot)
5570 if (btrfs_header_level(buf)) {
5571 struct btrfs_key_ptr ptr1, ptr2;
5573 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
5574 sizeof(struct btrfs_key_ptr));
5575 read_extent_buffer(buf, &ptr2,
5576 btrfs_node_key_ptr_offset(slot + 1),
5577 sizeof(struct btrfs_key_ptr));
5578 write_extent_buffer(buf, &ptr1,
5579 btrfs_node_key_ptr_offset(slot + 1),
5580 sizeof(struct btrfs_key_ptr));
5581 write_extent_buffer(buf, &ptr2,
5582 btrfs_node_key_ptr_offset(slot),
5583 sizeof(struct btrfs_key_ptr));
5585 struct btrfs_disk_key key;
5586 btrfs_node_key(buf, &key, 0);
5587 btrfs_fixup_low_keys(root, path, &key,
5588 btrfs_header_level(buf) + 1);
5591 struct btrfs_item *item1, *item2;
5592 struct btrfs_key k1, k2;
5593 char *item1_data, *item2_data;
5594 u32 item1_offset, item2_offset, item1_size, item2_size;
5596 item1 = btrfs_item_nr(slot);
5597 item2 = btrfs_item_nr(slot + 1);
5598 btrfs_item_key_to_cpu(buf, &k1, slot);
5599 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
5600 item1_offset = btrfs_item_offset(buf, item1);
5601 item2_offset = btrfs_item_offset(buf, item2);
5602 item1_size = btrfs_item_size(buf, item1);
5603 item2_size = btrfs_item_size(buf, item2);
5605 item1_data = malloc(item1_size);
5608 item2_data = malloc(item2_size);
5614 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
5615 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
5617 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
5618 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
5622 btrfs_set_item_offset(buf, item1, item2_offset);
5623 btrfs_set_item_offset(buf, item2, item1_offset);
5624 btrfs_set_item_size(buf, item1, item2_size);
5625 btrfs_set_item_size(buf, item2, item1_size);
5627 path->slots[0] = slot;
5628 btrfs_set_item_key_unsafe(root, path, &k2);
5629 path->slots[0] = slot + 1;
5630 btrfs_set_item_key_unsafe(root, path, &k1);
5635 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
5637 struct extent_buffer *buf;
5638 struct btrfs_key k1, k2;
5640 int level = path->lowest_level;
5643 buf = path->nodes[level];
5644 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5646 btrfs_node_key_to_cpu(buf, &k1, i);
5647 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5649 btrfs_item_key_to_cpu(buf, &k1, i);
5650 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5652 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5654 ret = swap_values(root, path, buf, i);
5657 btrfs_mark_buffer_dirty(buf);
5663 static int delete_bogus_item(struct btrfs_root *root,
5664 struct btrfs_path *path,
5665 struct extent_buffer *buf, int slot)
5667 struct btrfs_key key;
5668 int nritems = btrfs_header_nritems(buf);
5670 btrfs_item_key_to_cpu(buf, &key, slot);
5672 /* These are all the keys we can deal with missing. */
5673 if (key.type != BTRFS_DIR_INDEX_KEY &&
5674 key.type != BTRFS_EXTENT_ITEM_KEY &&
5675 key.type != BTRFS_METADATA_ITEM_KEY &&
5676 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5677 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5680 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5681 (unsigned long long)key.objectid, key.type,
5682 (unsigned long long)key.offset, slot, buf->start);
5683 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5684 btrfs_item_nr_offset(slot + 1),
5685 sizeof(struct btrfs_item) *
5686 (nritems - slot - 1));
5687 btrfs_set_header_nritems(buf, nritems - 1);
5689 struct btrfs_disk_key disk_key;
5691 btrfs_item_key(buf, &disk_key, 0);
5692 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5694 btrfs_mark_buffer_dirty(buf);
5698 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
5700 struct extent_buffer *buf;
5704 /* We should only get this for leaves */
5705 BUG_ON(path->lowest_level);
5706 buf = path->nodes[0];
5708 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5709 unsigned int shift = 0, offset;
5711 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5712 BTRFS_LEAF_DATA_SIZE(root)) {
5713 if (btrfs_item_end_nr(buf, i) >
5714 BTRFS_LEAF_DATA_SIZE(root)) {
5715 ret = delete_bogus_item(root, path, buf, i);
5718 fprintf(stderr, "item is off the end of the "
5719 "leaf, can't fix\n");
5723 shift = BTRFS_LEAF_DATA_SIZE(root) -
5724 btrfs_item_end_nr(buf, i);
5725 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5726 btrfs_item_offset_nr(buf, i - 1)) {
5727 if (btrfs_item_end_nr(buf, i) >
5728 btrfs_item_offset_nr(buf, i - 1)) {
5729 ret = delete_bogus_item(root, path, buf, i);
5732 fprintf(stderr, "items overlap, can't fix\n");
5736 shift = btrfs_item_offset_nr(buf, i - 1) -
5737 btrfs_item_end_nr(buf, i);
5742 printf("Shifting item nr %d by %u bytes in block %llu\n",
5743 i, shift, (unsigned long long)buf->start);
5744 offset = btrfs_item_offset_nr(buf, i);
5745 memmove_extent_buffer(buf,
5746 btrfs_leaf_data(buf) + offset + shift,
5747 btrfs_leaf_data(buf) + offset,
5748 btrfs_item_size_nr(buf, i));
5749 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5751 btrfs_mark_buffer_dirty(buf);
5755 * We may have moved things, in which case we want to exit so we don't
5756 * write those changes out. Once we have proper abort functionality in
5757 * progs this can be changed to something nicer.
5764 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5765 * then just return -EIO.
5767 static int try_to_fix_bad_block(struct btrfs_root *root,
5768 struct extent_buffer *buf,
5769 enum btrfs_tree_block_status status)
5771 struct btrfs_trans_handle *trans;
5772 struct ulist *roots;
5773 struct ulist_node *node;
5774 struct btrfs_root *search_root;
5775 struct btrfs_path path;
5776 struct ulist_iterator iter;
5777 struct btrfs_key root_key, key;
5780 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5781 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5784 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5788 btrfs_init_path(&path);
5789 ULIST_ITER_INIT(&iter);
5790 while ((node = ulist_next(roots, &iter))) {
5791 root_key.objectid = node->val;
5792 root_key.type = BTRFS_ROOT_ITEM_KEY;
5793 root_key.offset = (u64)-1;
5795 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5802 trans = btrfs_start_transaction(search_root, 0);
5803 if (IS_ERR(trans)) {
5804 ret = PTR_ERR(trans);
5808 path.lowest_level = btrfs_header_level(buf);
5809 path.skip_check_block = 1;
5810 if (path.lowest_level)
5811 btrfs_node_key_to_cpu(buf, &key, 0);
5813 btrfs_item_key_to_cpu(buf, &key, 0);
5814 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5817 btrfs_commit_transaction(trans, search_root);
5820 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5821 ret = fix_key_order(search_root, &path);
5822 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5823 ret = fix_item_offset(search_root, &path);
5825 btrfs_commit_transaction(trans, search_root);
5828 btrfs_release_path(&path);
5829 btrfs_commit_transaction(trans, search_root);
5832 btrfs_release_path(&path);
5836 static int check_block(struct btrfs_root *root,
5837 struct cache_tree *extent_cache,
5838 struct extent_buffer *buf, u64 flags)
5840 struct extent_record *rec;
5841 struct cache_extent *cache;
5842 struct btrfs_key key;
5843 enum btrfs_tree_block_status status;
5847 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5850 rec = container_of(cache, struct extent_record, cache);
5851 rec->generation = btrfs_header_generation(buf);
5853 level = btrfs_header_level(buf);
5854 if (btrfs_header_nritems(buf) > 0) {
5857 btrfs_item_key_to_cpu(buf, &key, 0);
5859 btrfs_node_key_to_cpu(buf, &key, 0);
5861 rec->info_objectid = key.objectid;
5863 rec->info_level = level;
5865 if (btrfs_is_leaf(buf))
5866 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5868 status = btrfs_check_node(root, &rec->parent_key, buf);
5870 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5872 status = try_to_fix_bad_block(root, buf, status);
5873 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5875 fprintf(stderr, "bad block %llu\n",
5876 (unsigned long long)buf->start);
5879 * Signal to callers we need to start the scan over
5880 * again since we'll have cowed blocks.
5885 rec->content_checked = 1;
5886 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5887 rec->owner_ref_checked = 1;
5889 ret = check_owner_ref(root, rec, buf);
5891 rec->owner_ref_checked = 1;
5895 maybe_free_extent_rec(extent_cache, rec);
5899 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5900 u64 parent, u64 root)
5902 struct list_head *cur = rec->backrefs.next;
5903 struct extent_backref *node;
5904 struct tree_backref *back;
5906 while(cur != &rec->backrefs) {
5907 node = to_extent_backref(cur);
5911 back = to_tree_backref(node);
5913 if (!node->full_backref)
5915 if (parent == back->parent)
5918 if (node->full_backref)
5920 if (back->root == root)
5927 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5928 u64 parent, u64 root)
5930 struct tree_backref *ref = malloc(sizeof(*ref));
5934 memset(&ref->node, 0, sizeof(ref->node));
5936 ref->parent = parent;
5937 ref->node.full_backref = 1;
5940 ref->node.full_backref = 0;
5942 list_add_tail(&ref->node.list, &rec->backrefs);
5947 static struct data_backref *find_data_backref(struct extent_record *rec,
5948 u64 parent, u64 root,
5949 u64 owner, u64 offset,
5951 u64 disk_bytenr, u64 bytes)
5953 struct list_head *cur = rec->backrefs.next;
5954 struct extent_backref *node;
5955 struct data_backref *back;
5957 while(cur != &rec->backrefs) {
5958 node = to_extent_backref(cur);
5962 back = to_data_backref(node);
5964 if (!node->full_backref)
5966 if (parent == back->parent)
5969 if (node->full_backref)
5971 if (back->root == root && back->owner == owner &&
5972 back->offset == offset) {
5973 if (found_ref && node->found_ref &&
5974 (back->bytes != bytes ||
5975 back->disk_bytenr != disk_bytenr))
5984 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5985 u64 parent, u64 root,
5986 u64 owner, u64 offset,
5989 struct data_backref *ref = malloc(sizeof(*ref));
5993 memset(&ref->node, 0, sizeof(ref->node));
5994 ref->node.is_data = 1;
5997 ref->parent = parent;
6000 ref->node.full_backref = 1;
6004 ref->offset = offset;
6005 ref->node.full_backref = 0;
6007 ref->bytes = max_size;
6010 list_add_tail(&ref->node.list, &rec->backrefs);
6011 if (max_size > rec->max_size)
6012 rec->max_size = max_size;
6016 /* Check if the type of extent matches with its chunk */
6017 static void check_extent_type(struct extent_record *rec)
6019 struct btrfs_block_group_cache *bg_cache;
6021 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
6025 /* data extent, check chunk directly*/
6026 if (!rec->metadata) {
6027 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
6028 rec->wrong_chunk_type = 1;
6032 /* metadata extent, check the obvious case first */
6033 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
6034 BTRFS_BLOCK_GROUP_METADATA))) {
6035 rec->wrong_chunk_type = 1;
6040 * Check SYSTEM extent, as it's also marked as metadata, we can only
6041 * make sure it's a SYSTEM extent by its backref
6043 if (!list_empty(&rec->backrefs)) {
6044 struct extent_backref *node;
6045 struct tree_backref *tback;
6048 node = to_extent_backref(rec->backrefs.next);
6049 if (node->is_data) {
6050 /* tree block shouldn't have data backref */
6051 rec->wrong_chunk_type = 1;
6054 tback = container_of(node, struct tree_backref, node);
6056 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
6057 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
6059 bg_type = BTRFS_BLOCK_GROUP_METADATA;
6060 if (!(bg_cache->flags & bg_type))
6061 rec->wrong_chunk_type = 1;
6066 * Allocate a new extent record, fill default values from @tmpl and insert int
6067 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
6068 * the cache, otherwise it fails.
6070 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
6071 struct extent_record *tmpl)
6073 struct extent_record *rec;
6076 BUG_ON(tmpl->max_size == 0);
6077 rec = malloc(sizeof(*rec));
6080 rec->start = tmpl->start;
6081 rec->max_size = tmpl->max_size;
6082 rec->nr = max(tmpl->nr, tmpl->max_size);
6083 rec->found_rec = tmpl->found_rec;
6084 rec->content_checked = tmpl->content_checked;
6085 rec->owner_ref_checked = tmpl->owner_ref_checked;
6086 rec->num_duplicates = 0;
6087 rec->metadata = tmpl->metadata;
6088 rec->flag_block_full_backref = FLAG_UNSET;
6089 rec->bad_full_backref = 0;
6090 rec->crossing_stripes = 0;
6091 rec->wrong_chunk_type = 0;
6092 rec->is_root = tmpl->is_root;
6093 rec->refs = tmpl->refs;
6094 rec->extent_item_refs = tmpl->extent_item_refs;
6095 rec->parent_generation = tmpl->parent_generation;
6096 INIT_LIST_HEAD(&rec->backrefs);
6097 INIT_LIST_HEAD(&rec->dups);
6098 INIT_LIST_HEAD(&rec->list);
6099 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
6100 rec->cache.start = tmpl->start;
6101 rec->cache.size = tmpl->nr;
6102 ret = insert_cache_extent(extent_cache, &rec->cache);
6107 bytes_used += rec->nr;
6110 rec->crossing_stripes = check_crossing_stripes(global_info,
6111 rec->start, global_info->nodesize);
6112 check_extent_type(rec);
6117 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
6119 * - refs - if found, increase refs
6120 * - is_root - if found, set
6121 * - content_checked - if found, set
6122 * - owner_ref_checked - if found, set
6124 * If not found, create a new one, initialize and insert.
6126 static int add_extent_rec(struct cache_tree *extent_cache,
6127 struct extent_record *tmpl)
6129 struct extent_record *rec;
6130 struct cache_extent *cache;
6134 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
6136 rec = container_of(cache, struct extent_record, cache);
6140 rec->nr = max(tmpl->nr, tmpl->max_size);
6143 * We need to make sure to reset nr to whatever the extent
6144 * record says was the real size, this way we can compare it to
6147 if (tmpl->found_rec) {
6148 if (tmpl->start != rec->start || rec->found_rec) {
6149 struct extent_record *tmp;
6152 if (list_empty(&rec->list))
6153 list_add_tail(&rec->list,
6154 &duplicate_extents);
6157 * We have to do this song and dance in case we
6158 * find an extent record that falls inside of
6159 * our current extent record but does not have
6160 * the same objectid.
6162 tmp = malloc(sizeof(*tmp));
6165 tmp->start = tmpl->start;
6166 tmp->max_size = tmpl->max_size;
6169 tmp->metadata = tmpl->metadata;
6170 tmp->extent_item_refs = tmpl->extent_item_refs;
6171 INIT_LIST_HEAD(&tmp->list);
6172 list_add_tail(&tmp->list, &rec->dups);
6173 rec->num_duplicates++;
6180 if (tmpl->extent_item_refs && !dup) {
6181 if (rec->extent_item_refs) {
6182 fprintf(stderr, "block %llu rec "
6183 "extent_item_refs %llu, passed %llu\n",
6184 (unsigned long long)tmpl->start,
6185 (unsigned long long)
6186 rec->extent_item_refs,
6187 (unsigned long long)tmpl->extent_item_refs);
6189 rec->extent_item_refs = tmpl->extent_item_refs;
6193 if (tmpl->content_checked)
6194 rec->content_checked = 1;
6195 if (tmpl->owner_ref_checked)
6196 rec->owner_ref_checked = 1;
6197 memcpy(&rec->parent_key, &tmpl->parent_key,
6198 sizeof(tmpl->parent_key));
6199 if (tmpl->parent_generation)
6200 rec->parent_generation = tmpl->parent_generation;
6201 if (rec->max_size < tmpl->max_size)
6202 rec->max_size = tmpl->max_size;
6205 * A metadata extent can't cross stripe_len boundary, otherwise
6206 * kernel scrub won't be able to handle it.
6207 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
6211 rec->crossing_stripes = check_crossing_stripes(
6212 global_info, rec->start,
6213 global_info->nodesize);
6214 check_extent_type(rec);
6215 maybe_free_extent_rec(extent_cache, rec);
6219 ret = add_extent_rec_nolookup(extent_cache, tmpl);
6224 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
6225 u64 parent, u64 root, int found_ref)
6227 struct extent_record *rec;
6228 struct tree_backref *back;
6229 struct cache_extent *cache;
6232 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6234 struct extent_record tmpl;
6236 memset(&tmpl, 0, sizeof(tmpl));
6237 tmpl.start = bytenr;
6242 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6246 /* really a bug in cache_extent implement now */
6247 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6252 rec = container_of(cache, struct extent_record, cache);
6253 if (rec->start != bytenr) {
6255 * Several cause, from unaligned bytenr to over lapping extents
6260 back = find_tree_backref(rec, parent, root);
6262 back = alloc_tree_backref(rec, parent, root);
6268 if (back->node.found_ref) {
6269 fprintf(stderr, "Extent back ref already exists "
6270 "for %llu parent %llu root %llu \n",
6271 (unsigned long long)bytenr,
6272 (unsigned long long)parent,
6273 (unsigned long long)root);
6275 back->node.found_ref = 1;
6277 if (back->node.found_extent_tree) {
6278 fprintf(stderr, "Extent back ref already exists "
6279 "for %llu parent %llu root %llu \n",
6280 (unsigned long long)bytenr,
6281 (unsigned long long)parent,
6282 (unsigned long long)root);
6284 back->node.found_extent_tree = 1;
6286 check_extent_type(rec);
6287 maybe_free_extent_rec(extent_cache, rec);
6291 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
6292 u64 parent, u64 root, u64 owner, u64 offset,
6293 u32 num_refs, int found_ref, u64 max_size)
6295 struct extent_record *rec;
6296 struct data_backref *back;
6297 struct cache_extent *cache;
6300 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6302 struct extent_record tmpl;
6304 memset(&tmpl, 0, sizeof(tmpl));
6305 tmpl.start = bytenr;
6307 tmpl.max_size = max_size;
6309 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
6313 cache = lookup_cache_extent(extent_cache, bytenr, 1);
6318 rec = container_of(cache, struct extent_record, cache);
6319 if (rec->max_size < max_size)
6320 rec->max_size = max_size;
6323 * If found_ref is set then max_size is the real size and must match the
6324 * existing refs. So if we have already found a ref then we need to
6325 * make sure that this ref matches the existing one, otherwise we need
6326 * to add a new backref so we can notice that the backrefs don't match
6327 * and we need to figure out who is telling the truth. This is to
6328 * account for that awful fsync bug I introduced where we'd end up with
6329 * a btrfs_file_extent_item that would have its length include multiple
6330 * prealloc extents or point inside of a prealloc extent.
6332 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
6335 back = alloc_data_backref(rec, parent, root, owner, offset,
6341 BUG_ON(num_refs != 1);
6342 if (back->node.found_ref)
6343 BUG_ON(back->bytes != max_size);
6344 back->node.found_ref = 1;
6345 back->found_ref += 1;
6346 back->bytes = max_size;
6347 back->disk_bytenr = bytenr;
6349 rec->content_checked = 1;
6350 rec->owner_ref_checked = 1;
6352 if (back->node.found_extent_tree) {
6353 fprintf(stderr, "Extent back ref already exists "
6354 "for %llu parent %llu root %llu "
6355 "owner %llu offset %llu num_refs %lu\n",
6356 (unsigned long long)bytenr,
6357 (unsigned long long)parent,
6358 (unsigned long long)root,
6359 (unsigned long long)owner,
6360 (unsigned long long)offset,
6361 (unsigned long)num_refs);
6363 back->num_refs = num_refs;
6364 back->node.found_extent_tree = 1;
6366 maybe_free_extent_rec(extent_cache, rec);
6370 static int add_pending(struct cache_tree *pending,
6371 struct cache_tree *seen, u64 bytenr, u32 size)
6374 ret = add_cache_extent(seen, bytenr, size);
6377 add_cache_extent(pending, bytenr, size);
6381 static int pick_next_pending(struct cache_tree *pending,
6382 struct cache_tree *reada,
6383 struct cache_tree *nodes,
6384 u64 last, struct block_info *bits, int bits_nr,
6387 unsigned long node_start = last;
6388 struct cache_extent *cache;
6391 cache = search_cache_extent(reada, 0);
6393 bits[0].start = cache->start;
6394 bits[0].size = cache->size;
6399 if (node_start > 32768)
6400 node_start -= 32768;
6402 cache = search_cache_extent(nodes, node_start);
6404 cache = search_cache_extent(nodes, 0);
6407 cache = search_cache_extent(pending, 0);
6412 bits[ret].start = cache->start;
6413 bits[ret].size = cache->size;
6414 cache = next_cache_extent(cache);
6416 } while (cache && ret < bits_nr);
6422 bits[ret].start = cache->start;
6423 bits[ret].size = cache->size;
6424 cache = next_cache_extent(cache);
6426 } while (cache && ret < bits_nr);
6428 if (bits_nr - ret > 8) {
6429 u64 lookup = bits[0].start + bits[0].size;
6430 struct cache_extent *next;
6431 next = search_cache_extent(pending, lookup);
6433 if (next->start - lookup > 32768)
6435 bits[ret].start = next->start;
6436 bits[ret].size = next->size;
6437 lookup = next->start + next->size;
6441 next = next_cache_extent(next);
6449 static void free_chunk_record(struct cache_extent *cache)
6451 struct chunk_record *rec;
6453 rec = container_of(cache, struct chunk_record, cache);
6454 list_del_init(&rec->list);
6455 list_del_init(&rec->dextents);
6459 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
6461 cache_tree_free_extents(chunk_cache, free_chunk_record);
6464 static void free_device_record(struct rb_node *node)
6466 struct device_record *rec;
6468 rec = container_of(node, struct device_record, node);
6472 FREE_RB_BASED_TREE(device_cache, free_device_record);
6474 int insert_block_group_record(struct block_group_tree *tree,
6475 struct block_group_record *bg_rec)
6479 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
6483 list_add_tail(&bg_rec->list, &tree->block_groups);
6487 static void free_block_group_record(struct cache_extent *cache)
6489 struct block_group_record *rec;
6491 rec = container_of(cache, struct block_group_record, cache);
6492 list_del_init(&rec->list);
6496 void free_block_group_tree(struct block_group_tree *tree)
6498 cache_tree_free_extents(&tree->tree, free_block_group_record);
6501 int insert_device_extent_record(struct device_extent_tree *tree,
6502 struct device_extent_record *de_rec)
6507 * Device extent is a bit different from the other extents, because
6508 * the extents which belong to the different devices may have the
6509 * same start and size, so we need use the special extent cache
6510 * search/insert functions.
6512 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
6516 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
6517 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
6521 static void free_device_extent_record(struct cache_extent *cache)
6523 struct device_extent_record *rec;
6525 rec = container_of(cache, struct device_extent_record, cache);
6526 if (!list_empty(&rec->chunk_list))
6527 list_del_init(&rec->chunk_list);
6528 if (!list_empty(&rec->device_list))
6529 list_del_init(&rec->device_list);
6533 void free_device_extent_tree(struct device_extent_tree *tree)
6535 cache_tree_free_extents(&tree->tree, free_device_extent_record);
6538 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6539 static int process_extent_ref_v0(struct cache_tree *extent_cache,
6540 struct extent_buffer *leaf, int slot)
6542 struct btrfs_extent_ref_v0 *ref0;
6543 struct btrfs_key key;
6546 btrfs_item_key_to_cpu(leaf, &key, slot);
6547 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
6548 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
6549 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
6552 ret = add_data_backref(extent_cache, key.objectid, key.offset,
6553 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
6559 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
6560 struct btrfs_key *key,
6563 struct btrfs_chunk *ptr;
6564 struct chunk_record *rec;
6567 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
6568 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
6570 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
6572 fprintf(stderr, "memory allocation failed\n");
6576 INIT_LIST_HEAD(&rec->list);
6577 INIT_LIST_HEAD(&rec->dextents);
6580 rec->cache.start = key->offset;
6581 rec->cache.size = btrfs_chunk_length(leaf, ptr);
6583 rec->generation = btrfs_header_generation(leaf);
6585 rec->objectid = key->objectid;
6586 rec->type = key->type;
6587 rec->offset = key->offset;
6589 rec->length = rec->cache.size;
6590 rec->owner = btrfs_chunk_owner(leaf, ptr);
6591 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
6592 rec->type_flags = btrfs_chunk_type(leaf, ptr);
6593 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
6594 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
6595 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
6596 rec->num_stripes = num_stripes;
6597 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
6599 for (i = 0; i < rec->num_stripes; ++i) {
6600 rec->stripes[i].devid =
6601 btrfs_stripe_devid_nr(leaf, ptr, i);
6602 rec->stripes[i].offset =
6603 btrfs_stripe_offset_nr(leaf, ptr, i);
6604 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
6605 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
6612 static int process_chunk_item(struct cache_tree *chunk_cache,
6613 struct btrfs_key *key, struct extent_buffer *eb,
6616 struct chunk_record *rec;
6617 struct btrfs_chunk *chunk;
6620 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
6622 * Do extra check for this chunk item,
6624 * It's still possible one can craft a leaf with CHUNK_ITEM, with
6625 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
6626 * and owner<->key_type check.
6628 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
6631 error("chunk(%llu, %llu) is not valid, ignore it",
6632 key->offset, btrfs_chunk_length(eb, chunk));
6635 rec = btrfs_new_chunk_record(eb, key, slot);
6636 ret = insert_cache_extent(chunk_cache, &rec->cache);
6638 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
6639 rec->offset, rec->length);
6646 static int process_device_item(struct rb_root *dev_cache,
6647 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6649 struct btrfs_dev_item *ptr;
6650 struct device_record *rec;
6653 ptr = btrfs_item_ptr(eb,
6654 slot, struct btrfs_dev_item);
6656 rec = malloc(sizeof(*rec));
6658 fprintf(stderr, "memory allocation failed\n");
6662 rec->devid = key->offset;
6663 rec->generation = btrfs_header_generation(eb);
6665 rec->objectid = key->objectid;
6666 rec->type = key->type;
6667 rec->offset = key->offset;
6669 rec->devid = btrfs_device_id(eb, ptr);
6670 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6671 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6673 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6675 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6682 struct block_group_record *
6683 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6686 struct btrfs_block_group_item *ptr;
6687 struct block_group_record *rec;
6689 rec = calloc(1, sizeof(*rec));
6691 fprintf(stderr, "memory allocation failed\n");
6695 rec->cache.start = key->objectid;
6696 rec->cache.size = key->offset;
6698 rec->generation = btrfs_header_generation(leaf);
6700 rec->objectid = key->objectid;
6701 rec->type = key->type;
6702 rec->offset = key->offset;
6704 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6705 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6707 INIT_LIST_HEAD(&rec->list);
6712 static int process_block_group_item(struct block_group_tree *block_group_cache,
6713 struct btrfs_key *key,
6714 struct extent_buffer *eb, int slot)
6716 struct block_group_record *rec;
6719 rec = btrfs_new_block_group_record(eb, key, slot);
6720 ret = insert_block_group_record(block_group_cache, rec);
6722 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6723 rec->objectid, rec->offset);
6730 struct device_extent_record *
6731 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6732 struct btrfs_key *key, int slot)
6734 struct device_extent_record *rec;
6735 struct btrfs_dev_extent *ptr;
6737 rec = calloc(1, sizeof(*rec));
6739 fprintf(stderr, "memory allocation failed\n");
6743 rec->cache.objectid = key->objectid;
6744 rec->cache.start = key->offset;
6746 rec->generation = btrfs_header_generation(leaf);
6748 rec->objectid = key->objectid;
6749 rec->type = key->type;
6750 rec->offset = key->offset;
6752 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6753 rec->chunk_objecteid =
6754 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6756 btrfs_dev_extent_chunk_offset(leaf, ptr);
6757 rec->length = btrfs_dev_extent_length(leaf, ptr);
6758 rec->cache.size = rec->length;
6760 INIT_LIST_HEAD(&rec->chunk_list);
6761 INIT_LIST_HEAD(&rec->device_list);
6767 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6768 struct btrfs_key *key, struct extent_buffer *eb,
6771 struct device_extent_record *rec;
6774 rec = btrfs_new_device_extent_record(eb, key, slot);
6775 ret = insert_device_extent_record(dev_extent_cache, rec);
6778 "Device extent[%llu, %llu, %llu] existed.\n",
6779 rec->objectid, rec->offset, rec->length);
6786 static int process_extent_item(struct btrfs_root *root,
6787 struct cache_tree *extent_cache,
6788 struct extent_buffer *eb, int slot)
6790 struct btrfs_extent_item *ei;
6791 struct btrfs_extent_inline_ref *iref;
6792 struct btrfs_extent_data_ref *dref;
6793 struct btrfs_shared_data_ref *sref;
6794 struct btrfs_key key;
6795 struct extent_record tmpl;
6800 u32 item_size = btrfs_item_size_nr(eb, slot);
6806 btrfs_item_key_to_cpu(eb, &key, slot);
6808 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6810 num_bytes = root->fs_info->nodesize;
6812 num_bytes = key.offset;
6815 if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
6816 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6817 key.objectid, root->fs_info->sectorsize);
6820 if (item_size < sizeof(*ei)) {
6821 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6822 struct btrfs_extent_item_v0 *ei0;
6823 BUG_ON(item_size != sizeof(*ei0));
6824 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6825 refs = btrfs_extent_refs_v0(eb, ei0);
6829 memset(&tmpl, 0, sizeof(tmpl));
6830 tmpl.start = key.objectid;
6831 tmpl.nr = num_bytes;
6832 tmpl.extent_item_refs = refs;
6833 tmpl.metadata = metadata;
6835 tmpl.max_size = num_bytes;
6837 return add_extent_rec(extent_cache, &tmpl);
6840 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6841 refs = btrfs_extent_refs(eb, ei);
6842 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6846 if (metadata && num_bytes != root->fs_info->nodesize) {
6847 error("ignore invalid metadata extent, length %llu does not equal to %u",
6848 num_bytes, root->fs_info->nodesize);
6851 if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
6852 error("ignore invalid data extent, length %llu is not aligned to %u",
6853 num_bytes, root->fs_info->sectorsize);
6857 memset(&tmpl, 0, sizeof(tmpl));
6858 tmpl.start = key.objectid;
6859 tmpl.nr = num_bytes;
6860 tmpl.extent_item_refs = refs;
6861 tmpl.metadata = metadata;
6863 tmpl.max_size = num_bytes;
6864 add_extent_rec(extent_cache, &tmpl);
6866 ptr = (unsigned long)(ei + 1);
6867 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6868 key.type == BTRFS_EXTENT_ITEM_KEY)
6869 ptr += sizeof(struct btrfs_tree_block_info);
6871 end = (unsigned long)ei + item_size;
6873 iref = (struct btrfs_extent_inline_ref *)ptr;
6874 type = btrfs_extent_inline_ref_type(eb, iref);
6875 offset = btrfs_extent_inline_ref_offset(eb, iref);
6877 case BTRFS_TREE_BLOCK_REF_KEY:
6878 ret = add_tree_backref(extent_cache, key.objectid,
6882 "add_tree_backref failed (extent items tree block): %s",
6885 case BTRFS_SHARED_BLOCK_REF_KEY:
6886 ret = add_tree_backref(extent_cache, key.objectid,
6890 "add_tree_backref failed (extent items shared block): %s",
6893 case BTRFS_EXTENT_DATA_REF_KEY:
6894 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6895 add_data_backref(extent_cache, key.objectid, 0,
6896 btrfs_extent_data_ref_root(eb, dref),
6897 btrfs_extent_data_ref_objectid(eb,
6899 btrfs_extent_data_ref_offset(eb, dref),
6900 btrfs_extent_data_ref_count(eb, dref),
6903 case BTRFS_SHARED_DATA_REF_KEY:
6904 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6905 add_data_backref(extent_cache, key.objectid, offset,
6907 btrfs_shared_data_ref_count(eb, sref),
6911 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6912 key.objectid, key.type, num_bytes);
6915 ptr += btrfs_extent_inline_ref_size(type);
6922 static int check_cache_range(struct btrfs_root *root,
6923 struct btrfs_block_group_cache *cache,
6924 u64 offset, u64 bytes)
6926 struct btrfs_free_space *entry;
6932 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6933 bytenr = btrfs_sb_offset(i);
6934 ret = btrfs_rmap_block(root->fs_info,
6935 cache->key.objectid, bytenr, 0,
6936 &logical, &nr, &stripe_len);
6941 if (logical[nr] + stripe_len <= offset)
6943 if (offset + bytes <= logical[nr])
6945 if (logical[nr] == offset) {
6946 if (stripe_len >= bytes) {
6950 bytes -= stripe_len;
6951 offset += stripe_len;
6952 } else if (logical[nr] < offset) {
6953 if (logical[nr] + stripe_len >=
6958 bytes = (offset + bytes) -
6959 (logical[nr] + stripe_len);
6960 offset = logical[nr] + stripe_len;
6963 * Could be tricky, the super may land in the
6964 * middle of the area we're checking. First
6965 * check the easiest case, it's at the end.
6967 if (logical[nr] + stripe_len >=
6969 bytes = logical[nr] - offset;
6973 /* Check the left side */
6974 ret = check_cache_range(root, cache,
6976 logical[nr] - offset);
6982 /* Now we continue with the right side */
6983 bytes = (offset + bytes) -
6984 (logical[nr] + stripe_len);
6985 offset = logical[nr] + stripe_len;
6992 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6994 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6995 offset, offset+bytes);
6999 if (entry->offset != offset) {
7000 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
7005 if (entry->bytes != bytes) {
7006 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
7007 bytes, entry->bytes, offset);
7011 unlink_free_space(cache->free_space_ctl, entry);
7016 static int verify_space_cache(struct btrfs_root *root,
7017 struct btrfs_block_group_cache *cache)
7019 struct btrfs_path path;
7020 struct extent_buffer *leaf;
7021 struct btrfs_key key;
7025 root = root->fs_info->extent_root;
7027 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
7029 btrfs_init_path(&path);
7030 key.objectid = last;
7032 key.type = BTRFS_EXTENT_ITEM_KEY;
7033 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7038 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7039 ret = btrfs_next_leaf(root, &path);
7047 leaf = path.nodes[0];
7048 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7049 if (key.objectid >= cache->key.offset + cache->key.objectid)
7051 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
7052 key.type != BTRFS_METADATA_ITEM_KEY) {
7057 if (last == key.objectid) {
7058 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7059 last = key.objectid + key.offset;
7061 last = key.objectid + root->fs_info->nodesize;
7066 ret = check_cache_range(root, cache, last,
7067 key.objectid - last);
7070 if (key.type == BTRFS_EXTENT_ITEM_KEY)
7071 last = key.objectid + key.offset;
7073 last = key.objectid + root->fs_info->nodesize;
7077 if (last < cache->key.objectid + cache->key.offset)
7078 ret = check_cache_range(root, cache, last,
7079 cache->key.objectid +
7080 cache->key.offset - last);
7083 btrfs_release_path(&path);
7086 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
7087 fprintf(stderr, "There are still entries left in the space "
7095 static int check_space_cache(struct btrfs_root *root)
7097 struct btrfs_block_group_cache *cache;
7098 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
7102 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
7103 btrfs_super_generation(root->fs_info->super_copy) !=
7104 btrfs_super_cache_generation(root->fs_info->super_copy)) {
7105 printf("cache and super generation don't match, space cache "
7106 "will be invalidated\n");
7110 if (ctx.progress_enabled) {
7111 ctx.tp = TASK_FREE_SPACE;
7112 task_start(ctx.info);
7116 cache = btrfs_lookup_first_block_group(root->fs_info, start);
7120 start = cache->key.objectid + cache->key.offset;
7121 if (!cache->free_space_ctl) {
7122 if (btrfs_init_free_space_ctl(cache,
7123 root->fs_info->sectorsize)) {
7128 btrfs_remove_free_space_cache(cache);
7131 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
7132 ret = exclude_super_stripes(root, cache);
7134 fprintf(stderr, "could not exclude super stripes: %s\n",
7139 ret = load_free_space_tree(root->fs_info, cache);
7140 free_excluded_extents(root, cache);
7142 fprintf(stderr, "could not load free space tree: %s\n",
7149 ret = load_free_space_cache(root->fs_info, cache);
7154 ret = verify_space_cache(root, cache);
7156 fprintf(stderr, "cache appears valid but isn't %Lu\n",
7157 cache->key.objectid);
7162 task_stop(ctx.info);
7164 return error ? -EINVAL : 0;
7167 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
7168 u64 num_bytes, unsigned long leaf_offset,
7169 struct extent_buffer *eb) {
7171 struct btrfs_fs_info *fs_info = root->fs_info;
7173 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7175 unsigned long csum_offset;
7179 u64 data_checked = 0;
7185 if (num_bytes % fs_info->sectorsize)
7188 data = malloc(num_bytes);
7192 while (offset < num_bytes) {
7195 read_len = num_bytes - offset;
7196 /* read as much space once a time */
7197 ret = read_extent_data(fs_info, data + offset,
7198 bytenr + offset, &read_len, mirror);
7202 /* verify every 4k data's checksum */
7203 while (data_checked < read_len) {
7205 tmp = offset + data_checked;
7207 csum = btrfs_csum_data((char *)data + tmp,
7208 csum, fs_info->sectorsize);
7209 btrfs_csum_final(csum, (u8 *)&csum);
7211 csum_offset = leaf_offset +
7212 tmp / fs_info->sectorsize * csum_size;
7213 read_extent_buffer(eb, (char *)&csum_expected,
7214 csum_offset, csum_size);
7215 /* try another mirror */
7216 if (csum != csum_expected) {
7217 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
7218 mirror, bytenr + tmp,
7219 csum, csum_expected);
7220 num_copies = btrfs_num_copies(root->fs_info,
7222 if (mirror < num_copies - 1) {
7227 data_checked += fs_info->sectorsize;
7236 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
7239 struct btrfs_path path;
7240 struct extent_buffer *leaf;
7241 struct btrfs_key key;
7244 btrfs_init_path(&path);
7245 key.objectid = bytenr;
7246 key.type = BTRFS_EXTENT_ITEM_KEY;
7247 key.offset = (u64)-1;
7250 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
7253 fprintf(stderr, "Error looking up extent record %d\n", ret);
7254 btrfs_release_path(&path);
7257 if (path.slots[0] > 0) {
7260 ret = btrfs_prev_leaf(root, &path);
7263 } else if (ret > 0) {
7270 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7273 * Block group items come before extent items if they have the same
7274 * bytenr, so walk back one more just in case. Dear future traveller,
7275 * first congrats on mastering time travel. Now if it's not too much
7276 * trouble could you go back to 2006 and tell Chris to make the
7277 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
7278 * EXTENT_ITEM_KEY please?
7280 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
7281 if (path.slots[0] > 0) {
7284 ret = btrfs_prev_leaf(root, &path);
7287 } else if (ret > 0) {
7292 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
7296 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7297 ret = btrfs_next_leaf(root, &path);
7299 fprintf(stderr, "Error going to next leaf "
7301 btrfs_release_path(&path);
7307 leaf = path.nodes[0];
7308 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7309 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
7313 if (key.objectid + key.offset < bytenr) {
7317 if (key.objectid > bytenr + num_bytes)
7320 if (key.objectid == bytenr) {
7321 if (key.offset >= num_bytes) {
7325 num_bytes -= key.offset;
7326 bytenr += key.offset;
7327 } else if (key.objectid < bytenr) {
7328 if (key.objectid + key.offset >= bytenr + num_bytes) {
7332 num_bytes = (bytenr + num_bytes) -
7333 (key.objectid + key.offset);
7334 bytenr = key.objectid + key.offset;
7336 if (key.objectid + key.offset < bytenr + num_bytes) {
7337 u64 new_start = key.objectid + key.offset;
7338 u64 new_bytes = bytenr + num_bytes - new_start;
7341 * Weird case, the extent is in the middle of
7342 * our range, we'll have to search one side
7343 * and then the other. Not sure if this happens
7344 * in real life, but no harm in coding it up
7345 * anyway just in case.
7347 btrfs_release_path(&path);
7348 ret = check_extent_exists(root, new_start,
7351 fprintf(stderr, "Right section didn't "
7355 num_bytes = key.objectid - bytenr;
7358 num_bytes = key.objectid - bytenr;
7365 if (num_bytes && !ret) {
7366 fprintf(stderr, "There are no extents for csum range "
7367 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
7371 btrfs_release_path(&path);
7375 static int check_csums(struct btrfs_root *root)
7377 struct btrfs_path path;
7378 struct extent_buffer *leaf;
7379 struct btrfs_key key;
7380 u64 offset = 0, num_bytes = 0;
7381 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7385 unsigned long leaf_offset;
7387 root = root->fs_info->csum_root;
7388 if (!extent_buffer_uptodate(root->node)) {
7389 fprintf(stderr, "No valid csum tree found\n");
7393 btrfs_init_path(&path);
7394 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
7395 key.type = BTRFS_EXTENT_CSUM_KEY;
7397 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
7399 fprintf(stderr, "Error searching csum tree %d\n", ret);
7400 btrfs_release_path(&path);
7404 if (ret > 0 && path.slots[0])
7409 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
7410 ret = btrfs_next_leaf(root, &path);
7412 fprintf(stderr, "Error going to next leaf "
7419 leaf = path.nodes[0];
7421 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
7422 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
7427 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
7428 csum_size) * root->fs_info->sectorsize;
7429 if (!check_data_csum)
7430 goto skip_csum_check;
7431 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
7432 ret = check_extent_csums(root, key.offset, data_len,
7438 offset = key.offset;
7439 } else if (key.offset != offset + num_bytes) {
7440 ret = check_extent_exists(root, offset, num_bytes);
7442 fprintf(stderr, "Csum exists for %Lu-%Lu but "
7443 "there is no extent record\n",
7444 offset, offset+num_bytes);
7447 offset = key.offset;
7450 num_bytes += data_len;
7454 btrfs_release_path(&path);
7458 static int is_dropped_key(struct btrfs_key *key,
7459 struct btrfs_key *drop_key) {
7460 if (key->objectid < drop_key->objectid)
7462 else if (key->objectid == drop_key->objectid) {
7463 if (key->type < drop_key->type)
7465 else if (key->type == drop_key->type) {
7466 if (key->offset < drop_key->offset)
7474 * Here are the rules for FULL_BACKREF.
7476 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
7477 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
7479 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
7480 * if it happened after the relocation occurred since we'll have dropped the
7481 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
7482 * have no real way to know for sure.
7484 * We process the blocks one root at a time, and we start from the lowest root
7485 * objectid and go to the highest. So we can just lookup the owner backref for
7486 * the record and if we don't find it then we know it doesn't exist and we have
7489 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
7490 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
7491 * be set or not and then we can check later once we've gathered all the refs.
7493 static int calc_extent_flag(struct cache_tree *extent_cache,
7494 struct extent_buffer *buf,
7495 struct root_item_record *ri,
7498 struct extent_record *rec;
7499 struct cache_extent *cache;
7500 struct tree_backref *tback;
7503 cache = lookup_cache_extent(extent_cache, buf->start, 1);
7504 /* we have added this extent before */
7508 rec = container_of(cache, struct extent_record, cache);
7511 * Except file/reloc tree, we can not have
7514 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
7519 if (buf->start == ri->bytenr)
7522 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7525 owner = btrfs_header_owner(buf);
7526 if (owner == ri->objectid)
7529 tback = find_tree_backref(rec, 0, owner);
7534 if (rec->flag_block_full_backref != FLAG_UNSET &&
7535 rec->flag_block_full_backref != 0)
7536 rec->bad_full_backref = 1;
7539 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7540 if (rec->flag_block_full_backref != FLAG_UNSET &&
7541 rec->flag_block_full_backref != 1)
7542 rec->bad_full_backref = 1;
7546 static void report_mismatch_key_root(u8 key_type, u64 rootid)
7548 fprintf(stderr, "Invalid key type(");
7549 print_key_type(stderr, 0, key_type);
7550 fprintf(stderr, ") found in root(");
7551 print_objectid(stderr, rootid, 0);
7552 fprintf(stderr, ")\n");
7556 * Check if the key is valid with its extent buffer.
7558 * This is a early check in case invalid key exists in a extent buffer
7559 * This is not comprehensive yet, but should prevent wrong key/item passed
7562 static int check_type_with_root(u64 rootid, u8 key_type)
7565 /* Only valid in chunk tree */
7566 case BTRFS_DEV_ITEM_KEY:
7567 case BTRFS_CHUNK_ITEM_KEY:
7568 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
7571 /* valid in csum and log tree */
7572 case BTRFS_CSUM_TREE_OBJECTID:
7573 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
7577 case BTRFS_EXTENT_ITEM_KEY:
7578 case BTRFS_METADATA_ITEM_KEY:
7579 case BTRFS_BLOCK_GROUP_ITEM_KEY:
7580 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
7583 case BTRFS_ROOT_ITEM_KEY:
7584 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
7587 case BTRFS_DEV_EXTENT_KEY:
7588 if (rootid != BTRFS_DEV_TREE_OBJECTID)
7594 report_mismatch_key_root(key_type, rootid);
7598 static int run_next_block(struct btrfs_root *root,
7599 struct block_info *bits,
7602 struct cache_tree *pending,
7603 struct cache_tree *seen,
7604 struct cache_tree *reada,
7605 struct cache_tree *nodes,
7606 struct cache_tree *extent_cache,
7607 struct cache_tree *chunk_cache,
7608 struct rb_root *dev_cache,
7609 struct block_group_tree *block_group_cache,
7610 struct device_extent_tree *dev_extent_cache,
7611 struct root_item_record *ri)
7613 struct extent_buffer *buf;
7614 struct extent_record *rec = NULL;
7625 struct btrfs_key key;
7626 struct cache_extent *cache;
7629 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
7630 bits_nr, &reada_bits);
7635 for(i = 0; i < nritems; i++) {
7636 ret = add_cache_extent(reada, bits[i].start,
7641 /* fixme, get the parent transid */
7642 readahead_tree_block(root, bits[i].start,
7646 *last = bits[0].start;
7647 bytenr = bits[0].start;
7648 size = bits[0].size;
7650 cache = lookup_cache_extent(pending, bytenr, size);
7652 remove_cache_extent(pending, cache);
7655 cache = lookup_cache_extent(reada, bytenr, size);
7657 remove_cache_extent(reada, cache);
7660 cache = lookup_cache_extent(nodes, bytenr, size);
7662 remove_cache_extent(nodes, cache);
7665 cache = lookup_cache_extent(extent_cache, bytenr, size);
7667 rec = container_of(cache, struct extent_record, cache);
7668 gen = rec->parent_generation;
7671 /* fixme, get the real parent transid */
7672 buf = read_tree_block(root->fs_info, bytenr, size, gen);
7673 if (!extent_buffer_uptodate(buf)) {
7674 record_bad_block_io(root->fs_info,
7675 extent_cache, bytenr, size);
7679 nritems = btrfs_header_nritems(buf);
7682 if (!init_extent_tree) {
7683 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7684 btrfs_header_level(buf), 1, NULL,
7687 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7689 fprintf(stderr, "Couldn't calc extent flags\n");
7690 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7695 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
7697 fprintf(stderr, "Couldn't calc extent flags\n");
7698 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7702 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7704 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7705 ri->objectid == btrfs_header_owner(buf)) {
7707 * Ok we got to this block from it's original owner and
7708 * we have FULL_BACKREF set. Relocation can leave
7709 * converted blocks over so this is altogether possible,
7710 * however it's not possible if the generation > the
7711 * last snapshot, so check for this case.
7713 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7714 btrfs_header_generation(buf) > ri->last_snapshot) {
7715 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7716 rec->bad_full_backref = 1;
7721 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7722 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7723 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7724 rec->bad_full_backref = 1;
7728 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7729 rec->flag_block_full_backref = 1;
7733 rec->flag_block_full_backref = 0;
7735 owner = btrfs_header_owner(buf);
7738 ret = check_block(root, extent_cache, buf, flags);
7742 if (btrfs_is_leaf(buf)) {
7743 btree_space_waste += btrfs_leaf_free_space(root, buf);
7744 for (i = 0; i < nritems; i++) {
7745 struct btrfs_file_extent_item *fi;
7746 btrfs_item_key_to_cpu(buf, &key, i);
7748 * Check key type against the leaf owner.
7749 * Could filter quite a lot of early error if
7752 if (check_type_with_root(btrfs_header_owner(buf),
7754 fprintf(stderr, "ignoring invalid key\n");
7757 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7758 process_extent_item(root, extent_cache, buf,
7762 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7763 process_extent_item(root, extent_cache, buf,
7767 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7769 btrfs_item_size_nr(buf, i);
7772 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7773 process_chunk_item(chunk_cache, &key, buf, i);
7776 if (key.type == BTRFS_DEV_ITEM_KEY) {
7777 process_device_item(dev_cache, &key, buf, i);
7780 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7781 process_block_group_item(block_group_cache,
7785 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7786 process_device_extent_item(dev_extent_cache,
7791 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7792 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7793 process_extent_ref_v0(extent_cache, buf, i);
7800 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7801 ret = add_tree_backref(extent_cache,
7802 key.objectid, 0, key.offset, 0);
7805 "add_tree_backref failed (leaf tree block): %s",
7809 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7810 ret = add_tree_backref(extent_cache,
7811 key.objectid, key.offset, 0, 0);
7814 "add_tree_backref failed (leaf shared block): %s",
7818 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7819 struct btrfs_extent_data_ref *ref;
7820 ref = btrfs_item_ptr(buf, i,
7821 struct btrfs_extent_data_ref);
7822 add_data_backref(extent_cache,
7824 btrfs_extent_data_ref_root(buf, ref),
7825 btrfs_extent_data_ref_objectid(buf,
7827 btrfs_extent_data_ref_offset(buf, ref),
7828 btrfs_extent_data_ref_count(buf, ref),
7829 0, root->fs_info->sectorsize);
7832 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7833 struct btrfs_shared_data_ref *ref;
7834 ref = btrfs_item_ptr(buf, i,
7835 struct btrfs_shared_data_ref);
7836 add_data_backref(extent_cache,
7837 key.objectid, key.offset, 0, 0, 0,
7838 btrfs_shared_data_ref_count(buf, ref),
7839 0, root->fs_info->sectorsize);
7842 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7843 struct bad_item *bad;
7845 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7849 bad = malloc(sizeof(struct bad_item));
7852 INIT_LIST_HEAD(&bad->list);
7853 memcpy(&bad->key, &key,
7854 sizeof(struct btrfs_key));
7855 bad->root_id = owner;
7856 list_add_tail(&bad->list, &delete_items);
7859 if (key.type != BTRFS_EXTENT_DATA_KEY)
7861 fi = btrfs_item_ptr(buf, i,
7862 struct btrfs_file_extent_item);
7863 if (btrfs_file_extent_type(buf, fi) ==
7864 BTRFS_FILE_EXTENT_INLINE)
7866 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7869 data_bytes_allocated +=
7870 btrfs_file_extent_disk_num_bytes(buf, fi);
7871 if (data_bytes_allocated < root->fs_info->sectorsize) {
7874 data_bytes_referenced +=
7875 btrfs_file_extent_num_bytes(buf, fi);
7876 add_data_backref(extent_cache,
7877 btrfs_file_extent_disk_bytenr(buf, fi),
7878 parent, owner, key.objectid, key.offset -
7879 btrfs_file_extent_offset(buf, fi), 1, 1,
7880 btrfs_file_extent_disk_num_bytes(buf, fi));
7884 struct btrfs_key first_key;
7886 first_key.objectid = 0;
7889 btrfs_item_key_to_cpu(buf, &first_key, 0);
7890 level = btrfs_header_level(buf);
7891 for (i = 0; i < nritems; i++) {
7892 struct extent_record tmpl;
7894 ptr = btrfs_node_blockptr(buf, i);
7895 size = root->fs_info->nodesize;
7896 btrfs_node_key_to_cpu(buf, &key, i);
7898 if ((level == ri->drop_level)
7899 && is_dropped_key(&key, &ri->drop_key)) {
7904 memset(&tmpl, 0, sizeof(tmpl));
7905 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7906 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7911 tmpl.max_size = size;
7912 ret = add_extent_rec(extent_cache, &tmpl);
7916 ret = add_tree_backref(extent_cache, ptr, parent,
7920 "add_tree_backref failed (non-leaf block): %s",
7926 add_pending(nodes, seen, ptr, size);
7928 add_pending(pending, seen, ptr, size);
7931 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7932 nritems) * sizeof(struct btrfs_key_ptr);
7934 total_btree_bytes += buf->len;
7935 if (fs_root_objectid(btrfs_header_owner(buf)))
7936 total_fs_tree_bytes += buf->len;
7937 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7938 total_extent_tree_bytes += buf->len;
7939 if (!found_old_backref &&
7940 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7941 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7942 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7943 found_old_backref = 1;
7945 free_extent_buffer(buf);
7949 static int add_root_to_pending(struct extent_buffer *buf,
7950 struct cache_tree *extent_cache,
7951 struct cache_tree *pending,
7952 struct cache_tree *seen,
7953 struct cache_tree *nodes,
7956 struct extent_record tmpl;
7959 if (btrfs_header_level(buf) > 0)
7960 add_pending(nodes, seen, buf->start, buf->len);
7962 add_pending(pending, seen, buf->start, buf->len);
7964 memset(&tmpl, 0, sizeof(tmpl));
7965 tmpl.start = buf->start;
7970 tmpl.max_size = buf->len;
7971 add_extent_rec(extent_cache, &tmpl);
7973 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7974 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7975 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7978 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7983 /* as we fix the tree, we might be deleting blocks that
7984 * we're tracking for repair. This hook makes sure we
7985 * remove any backrefs for blocks as we are fixing them.
7987 static int free_extent_hook(struct btrfs_trans_handle *trans,
7988 struct btrfs_root *root,
7989 u64 bytenr, u64 num_bytes, u64 parent,
7990 u64 root_objectid, u64 owner, u64 offset,
7993 struct extent_record *rec;
7994 struct cache_extent *cache;
7996 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7998 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7999 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
8003 rec = container_of(cache, struct extent_record, cache);
8005 struct data_backref *back;
8006 back = find_data_backref(rec, parent, root_objectid, owner,
8007 offset, 1, bytenr, num_bytes);
8010 if (back->node.found_ref) {
8011 back->found_ref -= refs_to_drop;
8013 rec->refs -= refs_to_drop;
8015 if (back->node.found_extent_tree) {
8016 back->num_refs -= refs_to_drop;
8017 if (rec->extent_item_refs)
8018 rec->extent_item_refs -= refs_to_drop;
8020 if (back->found_ref == 0)
8021 back->node.found_ref = 0;
8022 if (back->num_refs == 0)
8023 back->node.found_extent_tree = 0;
8025 if (!back->node.found_extent_tree && back->node.found_ref) {
8026 list_del(&back->node.list);
8030 struct tree_backref *back;
8031 back = find_tree_backref(rec, parent, root_objectid);
8034 if (back->node.found_ref) {
8037 back->node.found_ref = 0;
8039 if (back->node.found_extent_tree) {
8040 if (rec->extent_item_refs)
8041 rec->extent_item_refs--;
8042 back->node.found_extent_tree = 0;
8044 if (!back->node.found_extent_tree && back->node.found_ref) {
8045 list_del(&back->node.list);
8049 maybe_free_extent_rec(extent_cache, rec);
8054 static int delete_extent_records(struct btrfs_trans_handle *trans,
8055 struct btrfs_root *root,
8056 struct btrfs_path *path,
8059 struct btrfs_key key;
8060 struct btrfs_key found_key;
8061 struct extent_buffer *leaf;
8066 key.objectid = bytenr;
8068 key.offset = (u64)-1;
8071 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
8078 if (path->slots[0] == 0)
8084 leaf = path->nodes[0];
8085 slot = path->slots[0];
8087 btrfs_item_key_to_cpu(leaf, &found_key, slot);
8088 if (found_key.objectid != bytenr)
8091 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
8092 found_key.type != BTRFS_METADATA_ITEM_KEY &&
8093 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
8094 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
8095 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
8096 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
8097 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
8098 btrfs_release_path(path);
8099 if (found_key.type == 0) {
8100 if (found_key.offset == 0)
8102 key.offset = found_key.offset - 1;
8103 key.type = found_key.type;
8105 key.type = found_key.type - 1;
8106 key.offset = (u64)-1;
8110 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
8111 found_key.objectid, found_key.type, found_key.offset);
8113 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
8116 btrfs_release_path(path);
8118 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
8119 found_key.type == BTRFS_METADATA_ITEM_KEY) {
8120 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
8121 found_key.offset : root->fs_info->nodesize;
8123 ret = btrfs_update_block_group(trans, root, bytenr,
8130 btrfs_release_path(path);
8135 * for a single backref, this will allocate a new extent
8136 * and add the backref to it.
8138 static int record_extent(struct btrfs_trans_handle *trans,
8139 struct btrfs_fs_info *info,
8140 struct btrfs_path *path,
8141 struct extent_record *rec,
8142 struct extent_backref *back,
8143 int allocated, u64 flags)
8146 struct btrfs_root *extent_root = info->extent_root;
8147 struct extent_buffer *leaf;
8148 struct btrfs_key ins_key;
8149 struct btrfs_extent_item *ei;
8150 struct data_backref *dback;
8151 struct btrfs_tree_block_info *bi;
8154 rec->max_size = max_t(u64, rec->max_size,
8158 u32 item_size = sizeof(*ei);
8161 item_size += sizeof(*bi);
8163 ins_key.objectid = rec->start;
8164 ins_key.offset = rec->max_size;
8165 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
8167 ret = btrfs_insert_empty_item(trans, extent_root, path,
8168 &ins_key, item_size);
8172 leaf = path->nodes[0];
8173 ei = btrfs_item_ptr(leaf, path->slots[0],
8174 struct btrfs_extent_item);
8176 btrfs_set_extent_refs(leaf, ei, 0);
8177 btrfs_set_extent_generation(leaf, ei, rec->generation);
8179 if (back->is_data) {
8180 btrfs_set_extent_flags(leaf, ei,
8181 BTRFS_EXTENT_FLAG_DATA);
8183 struct btrfs_disk_key copy_key;;
8185 bi = (struct btrfs_tree_block_info *)(ei + 1);
8186 memset_extent_buffer(leaf, 0, (unsigned long)bi,
8189 btrfs_set_disk_key_objectid(©_key,
8190 rec->info_objectid);
8191 btrfs_set_disk_key_type(©_key, 0);
8192 btrfs_set_disk_key_offset(©_key, 0);
8194 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
8195 btrfs_set_tree_block_key(leaf, bi, ©_key);
8197 btrfs_set_extent_flags(leaf, ei,
8198 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
8201 btrfs_mark_buffer_dirty(leaf);
8202 ret = btrfs_update_block_group(trans, extent_root, rec->start,
8203 rec->max_size, 1, 0);
8206 btrfs_release_path(path);
8209 if (back->is_data) {
8213 dback = to_data_backref(back);
8214 if (back->full_backref)
8215 parent = dback->parent;
8219 for (i = 0; i < dback->found_ref; i++) {
8220 /* if parent != 0, we're doing a full backref
8221 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
8222 * just makes the backref allocator create a data
8225 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8226 rec->start, rec->max_size,
8230 BTRFS_FIRST_FREE_OBJECTID :
8236 fprintf(stderr, "adding new data backref"
8237 " on %llu %s %llu owner %llu"
8238 " offset %llu found %d\n",
8239 (unsigned long long)rec->start,
8240 back->full_backref ?
8242 back->full_backref ?
8243 (unsigned long long)parent :
8244 (unsigned long long)dback->root,
8245 (unsigned long long)dback->owner,
8246 (unsigned long long)dback->offset,
8250 struct tree_backref *tback;
8252 tback = to_tree_backref(back);
8253 if (back->full_backref)
8254 parent = tback->parent;
8258 ret = btrfs_inc_extent_ref(trans, info->extent_root,
8259 rec->start, rec->max_size,
8260 parent, tback->root, 0, 0);
8261 fprintf(stderr, "adding new tree backref on "
8262 "start %llu len %llu parent %llu root %llu\n",
8263 rec->start, rec->max_size, parent, tback->root);
8266 btrfs_release_path(path);
8270 static struct extent_entry *find_entry(struct list_head *entries,
8271 u64 bytenr, u64 bytes)
8273 struct extent_entry *entry = NULL;
8275 list_for_each_entry(entry, entries, list) {
8276 if (entry->bytenr == bytenr && entry->bytes == bytes)
8283 static struct extent_entry *find_most_right_entry(struct list_head *entries)
8285 struct extent_entry *entry, *best = NULL, *prev = NULL;
8287 list_for_each_entry(entry, entries, list) {
8289 * If there are as many broken entries as entries then we know
8290 * not to trust this particular entry.
8292 if (entry->broken == entry->count)
8296 * Special case, when there are only two entries and 'best' is
8306 * If our current entry == best then we can't be sure our best
8307 * is really the best, so we need to keep searching.
8309 if (best && best->count == entry->count) {
8315 /* Prev == entry, not good enough, have to keep searching */
8316 if (!prev->broken && prev->count == entry->count)
8320 best = (prev->count > entry->count) ? prev : entry;
8321 else if (best->count < entry->count)
8329 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
8330 struct data_backref *dback, struct extent_entry *entry)
8332 struct btrfs_trans_handle *trans;
8333 struct btrfs_root *root;
8334 struct btrfs_file_extent_item *fi;
8335 struct extent_buffer *leaf;
8336 struct btrfs_key key;
8340 key.objectid = dback->root;
8341 key.type = BTRFS_ROOT_ITEM_KEY;
8342 key.offset = (u64)-1;
8343 root = btrfs_read_fs_root(info, &key);
8345 fprintf(stderr, "Couldn't find root for our ref\n");
8350 * The backref points to the original offset of the extent if it was
8351 * split, so we need to search down to the offset we have and then walk
8352 * forward until we find the backref we're looking for.
8354 key.objectid = dback->owner;
8355 key.type = BTRFS_EXTENT_DATA_KEY;
8356 key.offset = dback->offset;
8357 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8359 fprintf(stderr, "Error looking up ref %d\n", ret);
8364 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8365 ret = btrfs_next_leaf(root, path);
8367 fprintf(stderr, "Couldn't find our ref, next\n");
8371 leaf = path->nodes[0];
8372 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8373 if (key.objectid != dback->owner ||
8374 key.type != BTRFS_EXTENT_DATA_KEY) {
8375 fprintf(stderr, "Couldn't find our ref, search\n");
8378 fi = btrfs_item_ptr(leaf, path->slots[0],
8379 struct btrfs_file_extent_item);
8380 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
8381 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
8383 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
8388 btrfs_release_path(path);
8390 trans = btrfs_start_transaction(root, 1);
8392 return PTR_ERR(trans);
8395 * Ok we have the key of the file extent we want to fix, now we can cow
8396 * down to the thing and fix it.
8398 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8400 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
8401 key.objectid, key.type, key.offset, ret);
8405 fprintf(stderr, "Well that's odd, we just found this key "
8406 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
8411 leaf = path->nodes[0];
8412 fi = btrfs_item_ptr(leaf, path->slots[0],
8413 struct btrfs_file_extent_item);
8415 if (btrfs_file_extent_compression(leaf, fi) &&
8416 dback->disk_bytenr != entry->bytenr) {
8417 fprintf(stderr, "Ref doesn't match the record start and is "
8418 "compressed, please take a btrfs-image of this file "
8419 "system and send it to a btrfs developer so they can "
8420 "complete this functionality for bytenr %Lu\n",
8421 dback->disk_bytenr);
8426 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
8427 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8428 } else if (dback->disk_bytenr > entry->bytenr) {
8429 u64 off_diff, offset;
8431 off_diff = dback->disk_bytenr - entry->bytenr;
8432 offset = btrfs_file_extent_offset(leaf, fi);
8433 if (dback->disk_bytenr + offset +
8434 btrfs_file_extent_num_bytes(leaf, fi) >
8435 entry->bytenr + entry->bytes) {
8436 fprintf(stderr, "Ref is past the entry end, please "
8437 "take a btrfs-image of this file system and "
8438 "send it to a btrfs developer, ref %Lu\n",
8439 dback->disk_bytenr);
8444 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8445 btrfs_set_file_extent_offset(leaf, fi, offset);
8446 } else if (dback->disk_bytenr < entry->bytenr) {
8449 offset = btrfs_file_extent_offset(leaf, fi);
8450 if (dback->disk_bytenr + offset < entry->bytenr) {
8451 fprintf(stderr, "Ref is before the entry start, please"
8452 " take a btrfs-image of this file system and "
8453 "send it to a btrfs developer, ref %Lu\n",
8454 dback->disk_bytenr);
8459 offset += dback->disk_bytenr;
8460 offset -= entry->bytenr;
8461 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
8462 btrfs_set_file_extent_offset(leaf, fi, offset);
8465 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
8468 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
8469 * only do this if we aren't using compression, otherwise it's a
8472 if (!btrfs_file_extent_compression(leaf, fi))
8473 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
8475 printf("ram bytes may be wrong?\n");
8476 btrfs_mark_buffer_dirty(leaf);
8478 err = btrfs_commit_transaction(trans, root);
8479 btrfs_release_path(path);
8480 return ret ? ret : err;
8483 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
8484 struct extent_record *rec)
8486 struct extent_backref *back;
8487 struct data_backref *dback;
8488 struct extent_entry *entry, *best = NULL;
8491 int broken_entries = 0;
8496 * Metadata is easy and the backrefs should always agree on bytenr and
8497 * size, if not we've got bigger issues.
8502 list_for_each_entry(back, &rec->backrefs, list) {
8503 if (back->full_backref || !back->is_data)
8506 dback = to_data_backref(back);
8509 * We only pay attention to backrefs that we found a real
8512 if (dback->found_ref == 0)
8516 * For now we only catch when the bytes don't match, not the
8517 * bytenr. We can easily do this at the same time, but I want
8518 * to have a fs image to test on before we just add repair
8519 * functionality willy-nilly so we know we won't screw up the
8523 entry = find_entry(&entries, dback->disk_bytenr,
8526 entry = malloc(sizeof(struct extent_entry));
8531 memset(entry, 0, sizeof(*entry));
8532 entry->bytenr = dback->disk_bytenr;
8533 entry->bytes = dback->bytes;
8534 list_add_tail(&entry->list, &entries);
8539 * If we only have on entry we may think the entries agree when
8540 * in reality they don't so we have to do some extra checking.
8542 if (dback->disk_bytenr != rec->start ||
8543 dback->bytes != rec->nr || back->broken)
8554 /* Yay all the backrefs agree, carry on good sir */
8555 if (nr_entries <= 1 && !mismatch)
8558 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
8559 "%Lu\n", rec->start);
8562 * First we want to see if the backrefs can agree amongst themselves who
8563 * is right, so figure out which one of the entries has the highest
8566 best = find_most_right_entry(&entries);
8569 * Ok so we may have an even split between what the backrefs think, so
8570 * this is where we use the extent ref to see what it thinks.
8573 entry = find_entry(&entries, rec->start, rec->nr);
8574 if (!entry && (!broken_entries || !rec->found_rec)) {
8575 fprintf(stderr, "Backrefs don't agree with each other "
8576 "and extent record doesn't agree with anybody,"
8577 " so we can't fix bytenr %Lu bytes %Lu\n",
8578 rec->start, rec->nr);
8581 } else if (!entry) {
8583 * Ok our backrefs were broken, we'll assume this is the
8584 * correct value and add an entry for this range.
8586 entry = malloc(sizeof(struct extent_entry));
8591 memset(entry, 0, sizeof(*entry));
8592 entry->bytenr = rec->start;
8593 entry->bytes = rec->nr;
8594 list_add_tail(&entry->list, &entries);
8598 best = find_most_right_entry(&entries);
8600 fprintf(stderr, "Backrefs and extent record evenly "
8601 "split on who is right, this is going to "
8602 "require user input to fix bytenr %Lu bytes "
8603 "%Lu\n", rec->start, rec->nr);
8610 * I don't think this can happen currently as we'll abort() if we catch
8611 * this case higher up, but in case somebody removes that we still can't
8612 * deal with it properly here yet, so just bail out of that's the case.
8614 if (best->bytenr != rec->start) {
8615 fprintf(stderr, "Extent start and backref starts don't match, "
8616 "please use btrfs-image on this file system and send "
8617 "it to a btrfs developer so they can make fsck fix "
8618 "this particular case. bytenr is %Lu, bytes is %Lu\n",
8619 rec->start, rec->nr);
8625 * Ok great we all agreed on an extent record, let's go find the real
8626 * references and fix up the ones that don't match.
8628 list_for_each_entry(back, &rec->backrefs, list) {
8629 if (back->full_backref || !back->is_data)
8632 dback = to_data_backref(back);
8635 * Still ignoring backrefs that don't have a real ref attached
8638 if (dback->found_ref == 0)
8641 if (dback->bytes == best->bytes &&
8642 dback->disk_bytenr == best->bytenr)
8645 ret = repair_ref(info, path, dback, best);
8651 * Ok we messed with the actual refs, which means we need to drop our
8652 * entire cache and go back and rescan. I know this is a huge pain and
8653 * adds a lot of extra work, but it's the only way to be safe. Once all
8654 * the backrefs agree we may not need to do anything to the extent
8659 while (!list_empty(&entries)) {
8660 entry = list_entry(entries.next, struct extent_entry, list);
8661 list_del_init(&entry->list);
8667 static int process_duplicates(struct cache_tree *extent_cache,
8668 struct extent_record *rec)
8670 struct extent_record *good, *tmp;
8671 struct cache_extent *cache;
8675 * If we found a extent record for this extent then return, or if we
8676 * have more than one duplicate we are likely going to need to delete
8679 if (rec->found_rec || rec->num_duplicates > 1)
8682 /* Shouldn't happen but just in case */
8683 BUG_ON(!rec->num_duplicates);
8686 * So this happens if we end up with a backref that doesn't match the
8687 * actual extent entry. So either the backref is bad or the extent
8688 * entry is bad. Either way we want to have the extent_record actually
8689 * reflect what we found in the extent_tree, so we need to take the
8690 * duplicate out and use that as the extent_record since the only way we
8691 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8693 remove_cache_extent(extent_cache, &rec->cache);
8695 good = to_extent_record(rec->dups.next);
8696 list_del_init(&good->list);
8697 INIT_LIST_HEAD(&good->backrefs);
8698 INIT_LIST_HEAD(&good->dups);
8699 good->cache.start = good->start;
8700 good->cache.size = good->nr;
8701 good->content_checked = 0;
8702 good->owner_ref_checked = 0;
8703 good->num_duplicates = 0;
8704 good->refs = rec->refs;
8705 list_splice_init(&rec->backrefs, &good->backrefs);
8707 cache = lookup_cache_extent(extent_cache, good->start,
8711 tmp = container_of(cache, struct extent_record, cache);
8714 * If we find another overlapping extent and it's found_rec is
8715 * set then it's a duplicate and we need to try and delete
8718 if (tmp->found_rec || tmp->num_duplicates > 0) {
8719 if (list_empty(&good->list))
8720 list_add_tail(&good->list,
8721 &duplicate_extents);
8722 good->num_duplicates += tmp->num_duplicates + 1;
8723 list_splice_init(&tmp->dups, &good->dups);
8724 list_del_init(&tmp->list);
8725 list_add_tail(&tmp->list, &good->dups);
8726 remove_cache_extent(extent_cache, &tmp->cache);
8731 * Ok we have another non extent item backed extent rec, so lets
8732 * just add it to this extent and carry on like we did above.
8734 good->refs += tmp->refs;
8735 list_splice_init(&tmp->backrefs, &good->backrefs);
8736 remove_cache_extent(extent_cache, &tmp->cache);
8739 ret = insert_cache_extent(extent_cache, &good->cache);
8742 return good->num_duplicates ? 0 : 1;
8745 static int delete_duplicate_records(struct btrfs_root *root,
8746 struct extent_record *rec)
8748 struct btrfs_trans_handle *trans;
8749 LIST_HEAD(delete_list);
8750 struct btrfs_path path;
8751 struct extent_record *tmp, *good, *n;
8754 struct btrfs_key key;
8756 btrfs_init_path(&path);
8759 /* Find the record that covers all of the duplicates. */
8760 list_for_each_entry(tmp, &rec->dups, list) {
8761 if (good->start < tmp->start)
8763 if (good->nr > tmp->nr)
8766 if (tmp->start + tmp->nr < good->start + good->nr) {
8767 fprintf(stderr, "Ok we have overlapping extents that "
8768 "aren't completely covered by each other, this "
8769 "is going to require more careful thought. "
8770 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8771 tmp->start, tmp->nr, good->start, good->nr);
8778 list_add_tail(&rec->list, &delete_list);
8780 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8783 list_move_tail(&tmp->list, &delete_list);
8786 root = root->fs_info->extent_root;
8787 trans = btrfs_start_transaction(root, 1);
8788 if (IS_ERR(trans)) {
8789 ret = PTR_ERR(trans);
8793 list_for_each_entry(tmp, &delete_list, list) {
8794 if (tmp->found_rec == 0)
8796 key.objectid = tmp->start;
8797 key.type = BTRFS_EXTENT_ITEM_KEY;
8798 key.offset = tmp->nr;
8800 /* Shouldn't happen but just in case */
8801 if (tmp->metadata) {
8802 fprintf(stderr, "Well this shouldn't happen, extent "
8803 "record overlaps but is metadata? "
8804 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8808 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8814 ret = btrfs_del_item(trans, root, &path);
8817 btrfs_release_path(&path);
8820 err = btrfs_commit_transaction(trans, root);
8824 while (!list_empty(&delete_list)) {
8825 tmp = to_extent_record(delete_list.next);
8826 list_del_init(&tmp->list);
8832 while (!list_empty(&rec->dups)) {
8833 tmp = to_extent_record(rec->dups.next);
8834 list_del_init(&tmp->list);
8838 btrfs_release_path(&path);
8840 if (!ret && !nr_del)
8841 rec->num_duplicates = 0;
8843 return ret ? ret : nr_del;
8846 static int find_possible_backrefs(struct btrfs_fs_info *info,
8847 struct btrfs_path *path,
8848 struct cache_tree *extent_cache,
8849 struct extent_record *rec)
8851 struct btrfs_root *root;
8852 struct extent_backref *back;
8853 struct data_backref *dback;
8854 struct cache_extent *cache;
8855 struct btrfs_file_extent_item *fi;
8856 struct btrfs_key key;
8860 list_for_each_entry(back, &rec->backrefs, list) {
8861 /* Don't care about full backrefs (poor unloved backrefs) */
8862 if (back->full_backref || !back->is_data)
8865 dback = to_data_backref(back);
8867 /* We found this one, we don't need to do a lookup */
8868 if (dback->found_ref)
8871 key.objectid = dback->root;
8872 key.type = BTRFS_ROOT_ITEM_KEY;
8873 key.offset = (u64)-1;
8875 root = btrfs_read_fs_root(info, &key);
8877 /* No root, definitely a bad ref, skip */
8878 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8880 /* Other err, exit */
8882 return PTR_ERR(root);
8884 key.objectid = dback->owner;
8885 key.type = BTRFS_EXTENT_DATA_KEY;
8886 key.offset = dback->offset;
8887 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8889 btrfs_release_path(path);
8892 /* Didn't find it, we can carry on */
8897 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8898 struct btrfs_file_extent_item);
8899 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8900 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8901 btrfs_release_path(path);
8902 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8904 struct extent_record *tmp;
8905 tmp = container_of(cache, struct extent_record, cache);
8908 * If we found an extent record for the bytenr for this
8909 * particular backref then we can't add it to our
8910 * current extent record. We only want to add backrefs
8911 * that don't have a corresponding extent item in the
8912 * extent tree since they likely belong to this record
8913 * and we need to fix it if it doesn't match bytenrs.
8919 dback->found_ref += 1;
8920 dback->disk_bytenr = bytenr;
8921 dback->bytes = bytes;
8924 * Set this so the verify backref code knows not to trust the
8925 * values in this backref.
8934 * Record orphan data ref into corresponding root.
8936 * Return 0 if the extent item contains data ref and recorded.
8937 * Return 1 if the extent item contains no useful data ref
8938 * On that case, it may contains only shared_dataref or metadata backref
8939 * or the file extent exists(this should be handled by the extent bytenr
8941 * Return <0 if something goes wrong.
8943 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8944 struct extent_record *rec)
8946 struct btrfs_key key;
8947 struct btrfs_root *dest_root;
8948 struct extent_backref *back;
8949 struct data_backref *dback;
8950 struct orphan_data_extent *orphan;
8951 struct btrfs_path path;
8952 int recorded_data_ref = 0;
8957 btrfs_init_path(&path);
8958 list_for_each_entry(back, &rec->backrefs, list) {
8959 if (back->full_backref || !back->is_data ||
8960 !back->found_extent_tree)
8962 dback = to_data_backref(back);
8963 if (dback->found_ref)
8965 key.objectid = dback->root;
8966 key.type = BTRFS_ROOT_ITEM_KEY;
8967 key.offset = (u64)-1;
8969 dest_root = btrfs_read_fs_root(fs_info, &key);
8971 /* For non-exist root we just skip it */
8972 if (IS_ERR(dest_root) || !dest_root)
8975 key.objectid = dback->owner;
8976 key.type = BTRFS_EXTENT_DATA_KEY;
8977 key.offset = dback->offset;
8979 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8980 btrfs_release_path(&path);
8982 * For ret < 0, it's OK since the fs-tree may be corrupted,
8983 * we need to record it for inode/file extent rebuild.
8984 * For ret > 0, we record it only for file extent rebuild.
8985 * For ret == 0, the file extent exists but only bytenr
8986 * mismatch, let the original bytenr fix routine to handle,
8992 orphan = malloc(sizeof(*orphan));
8997 INIT_LIST_HEAD(&orphan->list);
8998 orphan->root = dback->root;
8999 orphan->objectid = dback->owner;
9000 orphan->offset = dback->offset;
9001 orphan->disk_bytenr = rec->cache.start;
9002 orphan->disk_len = rec->cache.size;
9003 list_add(&dest_root->orphan_data_extents, &orphan->list);
9004 recorded_data_ref = 1;
9007 btrfs_release_path(&path);
9009 return !recorded_data_ref;
9015 * when an incorrect extent item is found, this will delete
9016 * all of the existing entries for it and recreate them
9017 * based on what the tree scan found.
9019 static int fixup_extent_refs(struct btrfs_fs_info *info,
9020 struct cache_tree *extent_cache,
9021 struct extent_record *rec)
9023 struct btrfs_trans_handle *trans = NULL;
9025 struct btrfs_path path;
9026 struct list_head *cur = rec->backrefs.next;
9027 struct cache_extent *cache;
9028 struct extent_backref *back;
9032 if (rec->flag_block_full_backref)
9033 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9035 btrfs_init_path(&path);
9036 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
9038 * Sometimes the backrefs themselves are so broken they don't
9039 * get attached to any meaningful rec, so first go back and
9040 * check any of our backrefs that we couldn't find and throw
9041 * them into the list if we find the backref so that
9042 * verify_backrefs can figure out what to do.
9044 ret = find_possible_backrefs(info, &path, extent_cache, rec);
9049 /* step one, make sure all of the backrefs agree */
9050 ret = verify_backrefs(info, &path, rec);
9054 trans = btrfs_start_transaction(info->extent_root, 1);
9055 if (IS_ERR(trans)) {
9056 ret = PTR_ERR(trans);
9060 /* step two, delete all the existing records */
9061 ret = delete_extent_records(trans, info->extent_root, &path,
9067 /* was this block corrupt? If so, don't add references to it */
9068 cache = lookup_cache_extent(info->corrupt_blocks,
9069 rec->start, rec->max_size);
9075 /* step three, recreate all the refs we did find */
9076 while(cur != &rec->backrefs) {
9077 back = to_extent_backref(cur);
9081 * if we didn't find any references, don't create a
9084 if (!back->found_ref)
9087 rec->bad_full_backref = 0;
9088 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
9096 int err = btrfs_commit_transaction(trans, info->extent_root);
9102 fprintf(stderr, "Repaired extent references for %llu\n",
9103 (unsigned long long)rec->start);
9105 btrfs_release_path(&path);
9109 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
9110 struct extent_record *rec)
9112 struct btrfs_trans_handle *trans;
9113 struct btrfs_root *root = fs_info->extent_root;
9114 struct btrfs_path path;
9115 struct btrfs_extent_item *ei;
9116 struct btrfs_key key;
9120 key.objectid = rec->start;
9121 if (rec->metadata) {
9122 key.type = BTRFS_METADATA_ITEM_KEY;
9123 key.offset = rec->info_level;
9125 key.type = BTRFS_EXTENT_ITEM_KEY;
9126 key.offset = rec->max_size;
9129 trans = btrfs_start_transaction(root, 0);
9131 return PTR_ERR(trans);
9133 btrfs_init_path(&path);
9134 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
9136 btrfs_release_path(&path);
9137 btrfs_commit_transaction(trans, root);
9140 fprintf(stderr, "Didn't find extent for %llu\n",
9141 (unsigned long long)rec->start);
9142 btrfs_release_path(&path);
9143 btrfs_commit_transaction(trans, root);
9147 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9148 struct btrfs_extent_item);
9149 flags = btrfs_extent_flags(path.nodes[0], ei);
9150 if (rec->flag_block_full_backref) {
9151 fprintf(stderr, "setting full backref on %llu\n",
9152 (unsigned long long)key.objectid);
9153 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9155 fprintf(stderr, "clearing full backref on %llu\n",
9156 (unsigned long long)key.objectid);
9157 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9159 btrfs_set_extent_flags(path.nodes[0], ei, flags);
9160 btrfs_mark_buffer_dirty(path.nodes[0]);
9161 btrfs_release_path(&path);
9162 ret = btrfs_commit_transaction(trans, root);
9164 fprintf(stderr, "Repaired extent flags for %llu\n",
9165 (unsigned long long)rec->start);
9170 /* right now we only prune from the extent allocation tree */
9171 static int prune_one_block(struct btrfs_trans_handle *trans,
9172 struct btrfs_fs_info *info,
9173 struct btrfs_corrupt_block *corrupt)
9176 struct btrfs_path path;
9177 struct extent_buffer *eb;
9181 int level = corrupt->level + 1;
9183 btrfs_init_path(&path);
9185 /* we want to stop at the parent to our busted block */
9186 path.lowest_level = level;
9188 ret = btrfs_search_slot(trans, info->extent_root,
9189 &corrupt->key, &path, -1, 1);
9194 eb = path.nodes[level];
9201 * hopefully the search gave us the block we want to prune,
9202 * lets try that first
9204 slot = path.slots[level];
9205 found = btrfs_node_blockptr(eb, slot);
9206 if (found == corrupt->cache.start)
9209 nritems = btrfs_header_nritems(eb);
9211 /* the search failed, lets scan this node and hope we find it */
9212 for (slot = 0; slot < nritems; slot++) {
9213 found = btrfs_node_blockptr(eb, slot);
9214 if (found == corrupt->cache.start)
9218 * we couldn't find the bad block. TODO, search all the nodes for pointers
9221 if (eb == info->extent_root->node) {
9226 btrfs_release_path(&path);
9231 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
9232 ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
9235 btrfs_release_path(&path);
9239 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
9241 struct btrfs_trans_handle *trans = NULL;
9242 struct cache_extent *cache;
9243 struct btrfs_corrupt_block *corrupt;
9246 cache = search_cache_extent(info->corrupt_blocks, 0);
9250 trans = btrfs_start_transaction(info->extent_root, 1);
9252 return PTR_ERR(trans);
9254 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
9255 prune_one_block(trans, info, corrupt);
9256 remove_cache_extent(info->corrupt_blocks, cache);
9259 return btrfs_commit_transaction(trans, info->extent_root);
9263 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
9265 struct btrfs_block_group_cache *cache;
9270 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
9271 &start, &end, EXTENT_DIRTY);
9274 clear_extent_dirty(&fs_info->free_space_cache, start, end);
9279 cache = btrfs_lookup_first_block_group(fs_info, start);
9284 start = cache->key.objectid + cache->key.offset;
9288 static int check_extent_refs(struct btrfs_root *root,
9289 struct cache_tree *extent_cache)
9291 struct extent_record *rec;
9292 struct cache_extent *cache;
9298 * if we're doing a repair, we have to make sure
9299 * we don't allocate from the problem extents.
9300 * In the worst case, this will be all the
9303 cache = search_cache_extent(extent_cache, 0);
9305 rec = container_of(cache, struct extent_record, cache);
9306 set_extent_dirty(root->fs_info->excluded_extents,
9308 rec->start + rec->max_size - 1);
9309 cache = next_cache_extent(cache);
9312 /* pin down all the corrupted blocks too */
9313 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
9315 set_extent_dirty(root->fs_info->excluded_extents,
9317 cache->start + cache->size - 1);
9318 cache = next_cache_extent(cache);
9320 prune_corrupt_blocks(root->fs_info);
9321 reset_cached_block_groups(root->fs_info);
9324 reset_cached_block_groups(root->fs_info);
9327 * We need to delete any duplicate entries we find first otherwise we
9328 * could mess up the extent tree when we have backrefs that actually
9329 * belong to a different extent item and not the weird duplicate one.
9331 while (repair && !list_empty(&duplicate_extents)) {
9332 rec = to_extent_record(duplicate_extents.next);
9333 list_del_init(&rec->list);
9335 /* Sometimes we can find a backref before we find an actual
9336 * extent, so we need to process it a little bit to see if there
9337 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
9338 * if this is a backref screwup. If we need to delete stuff
9339 * process_duplicates() will return 0, otherwise it will return
9342 if (process_duplicates(extent_cache, rec))
9344 ret = delete_duplicate_records(root, rec);
9348 * delete_duplicate_records will return the number of entries
9349 * deleted, so if it's greater than 0 then we know we actually
9350 * did something and we need to remove.
9363 cache = search_cache_extent(extent_cache, 0);
9366 rec = container_of(cache, struct extent_record, cache);
9367 if (rec->num_duplicates) {
9368 fprintf(stderr, "extent item %llu has multiple extent "
9369 "items\n", (unsigned long long)rec->start);
9373 if (rec->refs != rec->extent_item_refs) {
9374 fprintf(stderr, "ref mismatch on [%llu %llu] ",
9375 (unsigned long long)rec->start,
9376 (unsigned long long)rec->nr);
9377 fprintf(stderr, "extent item %llu, found %llu\n",
9378 (unsigned long long)rec->extent_item_refs,
9379 (unsigned long long)rec->refs);
9380 ret = record_orphan_data_extents(root->fs_info, rec);
9386 if (all_backpointers_checked(rec, 1)) {
9387 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
9388 (unsigned long long)rec->start,
9389 (unsigned long long)rec->nr);
9393 if (!rec->owner_ref_checked) {
9394 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
9395 (unsigned long long)rec->start,
9396 (unsigned long long)rec->nr);
9401 if (repair && fix) {
9402 ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
9408 if (rec->bad_full_backref) {
9409 fprintf(stderr, "bad full backref, on [%llu]\n",
9410 (unsigned long long)rec->start);
9412 ret = fixup_extent_flags(root->fs_info, rec);
9420 * Although it's not a extent ref's problem, we reuse this
9421 * routine for error reporting.
9422 * No repair function yet.
9424 if (rec->crossing_stripes) {
9426 "bad metadata [%llu, %llu) crossing stripe boundary\n",
9427 rec->start, rec->start + rec->max_size);
9431 if (rec->wrong_chunk_type) {
9433 "bad extent [%llu, %llu), type mismatch with chunk\n",
9434 rec->start, rec->start + rec->max_size);
9438 remove_cache_extent(extent_cache, cache);
9439 free_all_extent_backrefs(rec);
9440 if (!init_extent_tree && repair && (!cur_err || fix))
9441 clear_extent_dirty(root->fs_info->excluded_extents,
9443 rec->start + rec->max_size - 1);
9448 if (ret && ret != -EAGAIN) {
9449 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
9452 struct btrfs_trans_handle *trans;
9454 root = root->fs_info->extent_root;
9455 trans = btrfs_start_transaction(root, 1);
9456 if (IS_ERR(trans)) {
9457 ret = PTR_ERR(trans);
9461 btrfs_fix_block_accounting(trans, root);
9462 ret = btrfs_commit_transaction(trans, root);
9471 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
9475 if (type & BTRFS_BLOCK_GROUP_RAID0) {
9476 stripe_size = length;
9477 stripe_size /= num_stripes;
9478 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
9479 stripe_size = length * 2;
9480 stripe_size /= num_stripes;
9481 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
9482 stripe_size = length;
9483 stripe_size /= (num_stripes - 1);
9484 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
9485 stripe_size = length;
9486 stripe_size /= (num_stripes - 2);
9488 stripe_size = length;
9494 * Check the chunk with its block group/dev list ref:
9495 * Return 0 if all refs seems valid.
9496 * Return 1 if part of refs seems valid, need later check for rebuild ref
9497 * like missing block group and needs to search extent tree to rebuild them.
9498 * Return -1 if essential refs are missing and unable to rebuild.
9500 static int check_chunk_refs(struct chunk_record *chunk_rec,
9501 struct block_group_tree *block_group_cache,
9502 struct device_extent_tree *dev_extent_cache,
9505 struct cache_extent *block_group_item;
9506 struct block_group_record *block_group_rec;
9507 struct cache_extent *dev_extent_item;
9508 struct device_extent_record *dev_extent_rec;
9512 int metadump_v2 = 0;
9516 block_group_item = lookup_cache_extent(&block_group_cache->tree,
9519 if (block_group_item) {
9520 block_group_rec = container_of(block_group_item,
9521 struct block_group_record,
9523 if (chunk_rec->length != block_group_rec->offset ||
9524 chunk_rec->offset != block_group_rec->objectid ||
9526 chunk_rec->type_flags != block_group_rec->flags)) {
9529 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
9530 chunk_rec->objectid,
9535 chunk_rec->type_flags,
9536 block_group_rec->objectid,
9537 block_group_rec->type,
9538 block_group_rec->offset,
9539 block_group_rec->offset,
9540 block_group_rec->objectid,
9541 block_group_rec->flags);
9544 list_del_init(&block_group_rec->list);
9545 chunk_rec->bg_rec = block_group_rec;
9550 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
9551 chunk_rec->objectid,
9556 chunk_rec->type_flags);
9563 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
9564 chunk_rec->num_stripes);
9565 for (i = 0; i < chunk_rec->num_stripes; ++i) {
9566 devid = chunk_rec->stripes[i].devid;
9567 offset = chunk_rec->stripes[i].offset;
9568 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
9569 devid, offset, length);
9570 if (dev_extent_item) {
9571 dev_extent_rec = container_of(dev_extent_item,
9572 struct device_extent_record,
9574 if (dev_extent_rec->objectid != devid ||
9575 dev_extent_rec->offset != offset ||
9576 dev_extent_rec->chunk_offset != chunk_rec->offset ||
9577 dev_extent_rec->length != length) {
9580 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
9581 chunk_rec->objectid,
9584 chunk_rec->stripes[i].devid,
9585 chunk_rec->stripes[i].offset,
9586 dev_extent_rec->objectid,
9587 dev_extent_rec->offset,
9588 dev_extent_rec->length);
9591 list_move(&dev_extent_rec->chunk_list,
9592 &chunk_rec->dextents);
9597 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
9598 chunk_rec->objectid,
9601 chunk_rec->stripes[i].devid,
9602 chunk_rec->stripes[i].offset);
9609 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
9610 int check_chunks(struct cache_tree *chunk_cache,
9611 struct block_group_tree *block_group_cache,
9612 struct device_extent_tree *dev_extent_cache,
9613 struct list_head *good, struct list_head *bad,
9614 struct list_head *rebuild, int silent)
9616 struct cache_extent *chunk_item;
9617 struct chunk_record *chunk_rec;
9618 struct block_group_record *bg_rec;
9619 struct device_extent_record *dext_rec;
9623 chunk_item = first_cache_extent(chunk_cache);
9624 while (chunk_item) {
9625 chunk_rec = container_of(chunk_item, struct chunk_record,
9627 err = check_chunk_refs(chunk_rec, block_group_cache,
9628 dev_extent_cache, silent);
9631 if (err == 0 && good)
9632 list_add_tail(&chunk_rec->list, good);
9633 if (err > 0 && rebuild)
9634 list_add_tail(&chunk_rec->list, rebuild);
9636 list_add_tail(&chunk_rec->list, bad);
9637 chunk_item = next_cache_extent(chunk_item);
9640 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9643 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9651 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9655 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9666 static int check_device_used(struct device_record *dev_rec,
9667 struct device_extent_tree *dext_cache)
9669 struct cache_extent *cache;
9670 struct device_extent_record *dev_extent_rec;
9673 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9675 dev_extent_rec = container_of(cache,
9676 struct device_extent_record,
9678 if (dev_extent_rec->objectid != dev_rec->devid)
9681 list_del_init(&dev_extent_rec->device_list);
9682 total_byte += dev_extent_rec->length;
9683 cache = next_cache_extent(cache);
9686 if (total_byte != dev_rec->byte_used) {
9688 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9689 total_byte, dev_rec->byte_used, dev_rec->objectid,
9690 dev_rec->type, dev_rec->offset);
9697 /* check btrfs_dev_item -> btrfs_dev_extent */
9698 static int check_devices(struct rb_root *dev_cache,
9699 struct device_extent_tree *dev_extent_cache)
9701 struct rb_node *dev_node;
9702 struct device_record *dev_rec;
9703 struct device_extent_record *dext_rec;
9707 dev_node = rb_first(dev_cache);
9709 dev_rec = container_of(dev_node, struct device_record, node);
9710 err = check_device_used(dev_rec, dev_extent_cache);
9714 dev_node = rb_next(dev_node);
9716 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9719 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9720 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9727 static int add_root_item_to_list(struct list_head *head,
9728 u64 objectid, u64 bytenr, u64 last_snapshot,
9729 u8 level, u8 drop_level,
9730 int level_size, struct btrfs_key *drop_key)
9733 struct root_item_record *ri_rec;
9734 ri_rec = malloc(sizeof(*ri_rec));
9737 ri_rec->bytenr = bytenr;
9738 ri_rec->objectid = objectid;
9739 ri_rec->level = level;
9740 ri_rec->level_size = level_size;
9741 ri_rec->drop_level = drop_level;
9742 ri_rec->last_snapshot = last_snapshot;
9744 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9745 list_add_tail(&ri_rec->list, head);
9750 static void free_root_item_list(struct list_head *list)
9752 struct root_item_record *ri_rec;
9754 while (!list_empty(list)) {
9755 ri_rec = list_first_entry(list, struct root_item_record,
9757 list_del_init(&ri_rec->list);
9762 static int deal_root_from_list(struct list_head *list,
9763 struct btrfs_root *root,
9764 struct block_info *bits,
9766 struct cache_tree *pending,
9767 struct cache_tree *seen,
9768 struct cache_tree *reada,
9769 struct cache_tree *nodes,
9770 struct cache_tree *extent_cache,
9771 struct cache_tree *chunk_cache,
9772 struct rb_root *dev_cache,
9773 struct block_group_tree *block_group_cache,
9774 struct device_extent_tree *dev_extent_cache)
9779 while (!list_empty(list)) {
9780 struct root_item_record *rec;
9781 struct extent_buffer *buf;
9782 rec = list_entry(list->next,
9783 struct root_item_record, list);
9785 buf = read_tree_block(root->fs_info,
9786 rec->bytenr, rec->level_size, 0);
9787 if (!extent_buffer_uptodate(buf)) {
9788 free_extent_buffer(buf);
9792 ret = add_root_to_pending(buf, extent_cache, pending,
9793 seen, nodes, rec->objectid);
9797 * To rebuild extent tree, we need deal with snapshot
9798 * one by one, otherwise we deal with node firstly which
9799 * can maximize readahead.
9802 ret = run_next_block(root, bits, bits_nr, &last,
9803 pending, seen, reada, nodes,
9804 extent_cache, chunk_cache,
9805 dev_cache, block_group_cache,
9806 dev_extent_cache, rec);
9810 free_extent_buffer(buf);
9811 list_del(&rec->list);
9817 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9818 reada, nodes, extent_cache, chunk_cache,
9819 dev_cache, block_group_cache,
9820 dev_extent_cache, NULL);
9830 static int check_chunks_and_extents(struct btrfs_root *root)
9832 struct rb_root dev_cache;
9833 struct cache_tree chunk_cache;
9834 struct block_group_tree block_group_cache;
9835 struct device_extent_tree dev_extent_cache;
9836 struct cache_tree extent_cache;
9837 struct cache_tree seen;
9838 struct cache_tree pending;
9839 struct cache_tree reada;
9840 struct cache_tree nodes;
9841 struct extent_io_tree excluded_extents;
9842 struct cache_tree corrupt_blocks;
9843 struct btrfs_path path;
9844 struct btrfs_key key;
9845 struct btrfs_key found_key;
9847 struct block_info *bits;
9849 struct extent_buffer *leaf;
9851 struct btrfs_root_item ri;
9852 struct list_head dropping_trees;
9853 struct list_head normal_trees;
9854 struct btrfs_root *root1;
9859 dev_cache = RB_ROOT;
9860 cache_tree_init(&chunk_cache);
9861 block_group_tree_init(&block_group_cache);
9862 device_extent_tree_init(&dev_extent_cache);
9864 cache_tree_init(&extent_cache);
9865 cache_tree_init(&seen);
9866 cache_tree_init(&pending);
9867 cache_tree_init(&nodes);
9868 cache_tree_init(&reada);
9869 cache_tree_init(&corrupt_blocks);
9870 extent_io_tree_init(&excluded_extents);
9871 INIT_LIST_HEAD(&dropping_trees);
9872 INIT_LIST_HEAD(&normal_trees);
9875 root->fs_info->excluded_extents = &excluded_extents;
9876 root->fs_info->fsck_extent_cache = &extent_cache;
9877 root->fs_info->free_extent_hook = free_extent_hook;
9878 root->fs_info->corrupt_blocks = &corrupt_blocks;
9882 bits = malloc(bits_nr * sizeof(struct block_info));
9888 if (ctx.progress_enabled) {
9889 ctx.tp = TASK_EXTENTS;
9890 task_start(ctx.info);
9894 root1 = root->fs_info->tree_root;
9895 level = btrfs_header_level(root1->node);
9896 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9897 root1->node->start, 0, level, 0,
9898 root1->fs_info->nodesize, NULL);
9901 root1 = root->fs_info->chunk_root;
9902 level = btrfs_header_level(root1->node);
9903 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9904 root1->node->start, 0, level, 0,
9905 root1->fs_info->nodesize, NULL);
9908 btrfs_init_path(&path);
9911 key.type = BTRFS_ROOT_ITEM_KEY;
9912 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9917 leaf = path.nodes[0];
9918 slot = path.slots[0];
9919 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9920 ret = btrfs_next_leaf(root, &path);
9923 leaf = path.nodes[0];
9924 slot = path.slots[0];
9926 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9927 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9928 unsigned long offset;
9931 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9932 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9933 last_snapshot = btrfs_root_last_snapshot(&ri);
9934 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9935 level = btrfs_root_level(&ri);
9936 level_size = root->fs_info->nodesize;
9937 ret = add_root_item_to_list(&normal_trees,
9939 btrfs_root_bytenr(&ri),
9940 last_snapshot, level,
9941 0, level_size, NULL);
9945 level = btrfs_root_level(&ri);
9946 level_size = root->fs_info->nodesize;
9947 objectid = found_key.objectid;
9948 btrfs_disk_key_to_cpu(&found_key,
9950 ret = add_root_item_to_list(&dropping_trees,
9952 btrfs_root_bytenr(&ri),
9953 last_snapshot, level,
9955 level_size, &found_key);
9962 btrfs_release_path(&path);
9965 * check_block can return -EAGAIN if it fixes something, please keep
9966 * this in mind when dealing with return values from these functions, if
9967 * we get -EAGAIN we want to fall through and restart the loop.
9969 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9970 &seen, &reada, &nodes, &extent_cache,
9971 &chunk_cache, &dev_cache, &block_group_cache,
9978 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9979 &pending, &seen, &reada, &nodes,
9980 &extent_cache, &chunk_cache, &dev_cache,
9981 &block_group_cache, &dev_extent_cache);
9988 ret = check_chunks(&chunk_cache, &block_group_cache,
9989 &dev_extent_cache, NULL, NULL, NULL, 0);
9996 ret = check_extent_refs(root, &extent_cache);
10003 ret = check_devices(&dev_cache, &dev_extent_cache);
10008 task_stop(ctx.info);
10010 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10011 extent_io_tree_cleanup(&excluded_extents);
10012 root->fs_info->fsck_extent_cache = NULL;
10013 root->fs_info->free_extent_hook = NULL;
10014 root->fs_info->corrupt_blocks = NULL;
10015 root->fs_info->excluded_extents = NULL;
10018 free_chunk_cache_tree(&chunk_cache);
10019 free_device_cache_tree(&dev_cache);
10020 free_block_group_tree(&block_group_cache);
10021 free_device_extent_tree(&dev_extent_cache);
10022 free_extent_cache_tree(&seen);
10023 free_extent_cache_tree(&pending);
10024 free_extent_cache_tree(&reada);
10025 free_extent_cache_tree(&nodes);
10026 free_root_item_list(&normal_trees);
10027 free_root_item_list(&dropping_trees);
10030 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
10031 free_extent_cache_tree(&seen);
10032 free_extent_cache_tree(&pending);
10033 free_extent_cache_tree(&reada);
10034 free_extent_cache_tree(&nodes);
10035 free_chunk_cache_tree(&chunk_cache);
10036 free_block_group_tree(&block_group_cache);
10037 free_device_cache_tree(&dev_cache);
10038 free_device_extent_tree(&dev_extent_cache);
10039 free_extent_record_cache(&extent_cache);
10040 free_root_item_list(&normal_trees);
10041 free_root_item_list(&dropping_trees);
10042 extent_io_tree_cleanup(&excluded_extents);
10047 * Check backrefs of a tree block given by @bytenr or @eb.
10049 * @root: the root containing the @bytenr or @eb
10050 * @eb: tree block extent buffer, can be NULL
10051 * @bytenr: bytenr of the tree block to search
10052 * @level: tree level of the tree block
10053 * @owner: owner of the tree block
10055 * Return >0 for any error found and output error message
10056 * Return 0 for no error found
10058 static int check_tree_block_ref(struct btrfs_root *root,
10059 struct extent_buffer *eb, u64 bytenr,
10060 int level, u64 owner)
10062 struct btrfs_key key;
10063 struct btrfs_root *extent_root = root->fs_info->extent_root;
10064 struct btrfs_path path;
10065 struct btrfs_extent_item *ei;
10066 struct btrfs_extent_inline_ref *iref;
10067 struct extent_buffer *leaf;
10073 u32 nodesize = root->fs_info->nodesize;
10076 int tree_reloc_root = 0;
10081 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
10082 btrfs_header_bytenr(root->node) == bytenr)
10083 tree_reloc_root = 1;
10085 btrfs_init_path(&path);
10086 key.objectid = bytenr;
10087 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
10088 key.type = BTRFS_METADATA_ITEM_KEY;
10090 key.type = BTRFS_EXTENT_ITEM_KEY;
10091 key.offset = (u64)-1;
10093 /* Search for the backref in extent tree */
10094 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10096 err |= BACKREF_MISSING;
10099 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10101 err |= BACKREF_MISSING;
10105 leaf = path.nodes[0];
10106 slot = path.slots[0];
10107 btrfs_item_key_to_cpu(leaf, &key, slot);
10109 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10111 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10112 skinny_level = (int)key.offset;
10113 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10115 struct btrfs_tree_block_info *info;
10117 info = (struct btrfs_tree_block_info *)(ei + 1);
10118 skinny_level = btrfs_tree_block_level(leaf, info);
10119 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10126 if (!(btrfs_extent_flags(leaf, ei) &
10127 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10129 "extent[%llu %u] backref type mismatch, missing bit: %llx",
10130 key.objectid, nodesize,
10131 BTRFS_EXTENT_FLAG_TREE_BLOCK);
10132 err = BACKREF_MISMATCH;
10134 header_gen = btrfs_header_generation(eb);
10135 extent_gen = btrfs_extent_generation(leaf, ei);
10136 if (header_gen != extent_gen) {
10138 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
10139 key.objectid, nodesize, header_gen,
10141 err = BACKREF_MISMATCH;
10143 if (level != skinny_level) {
10145 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
10146 key.objectid, nodesize, level, skinny_level);
10147 err = BACKREF_MISMATCH;
10149 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
10151 "extent[%llu %u] is referred by other roots than %llu",
10152 key.objectid, nodesize, root->objectid);
10153 err = BACKREF_MISMATCH;
10158 * Iterate the extent/metadata item to find the exact backref
10160 item_size = btrfs_item_size_nr(leaf, slot);
10161 ptr = (unsigned long)iref;
10162 end = (unsigned long)ei + item_size;
10163 while (ptr < end) {
10164 iref = (struct btrfs_extent_inline_ref *)ptr;
10165 type = btrfs_extent_inline_ref_type(leaf, iref);
10166 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10168 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10169 (offset == root->objectid || offset == owner)) {
10171 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10173 * Backref of tree reloc root points to itself, no need
10174 * to check backref any more.
10176 if (tree_reloc_root)
10179 /* Check if the backref points to valid referencer */
10180 found_ref = !check_tree_block_ref(root, NULL,
10181 offset, level + 1, owner);
10186 ptr += btrfs_extent_inline_ref_size(type);
10190 * Inlined extent item doesn't have what we need, check
10191 * TREE_BLOCK_REF_KEY
10194 btrfs_release_path(&path);
10195 key.objectid = bytenr;
10196 key.type = BTRFS_TREE_BLOCK_REF_KEY;
10197 key.offset = root->objectid;
10199 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10204 err |= BACKREF_MISSING;
10206 btrfs_release_path(&path);
10207 if (eb && (err & BACKREF_MISSING))
10208 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
10209 bytenr, nodesize, owner, level);
10214 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
10216 * Return >0 any error found and output error message
10217 * Return 0 for no error found
10219 static int check_extent_data_item(struct btrfs_root *root,
10220 struct extent_buffer *eb, int slot)
10222 struct btrfs_file_extent_item *fi;
10223 struct btrfs_path path;
10224 struct btrfs_root *extent_root = root->fs_info->extent_root;
10225 struct btrfs_key fi_key;
10226 struct btrfs_key dbref_key;
10227 struct extent_buffer *leaf;
10228 struct btrfs_extent_item *ei;
10229 struct btrfs_extent_inline_ref *iref;
10230 struct btrfs_extent_data_ref *dref;
10233 u64 disk_num_bytes;
10234 u64 extent_num_bytes;
10241 int found_dbackref = 0;
10245 btrfs_item_key_to_cpu(eb, &fi_key, slot);
10246 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
10248 /* Nothing to check for hole and inline data extents */
10249 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
10250 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
10253 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
10254 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
10255 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
10257 /* Check unaligned disk_num_bytes and num_bytes */
10258 if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
10260 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
10261 fi_key.objectid, fi_key.offset, disk_num_bytes,
10262 root->fs_info->sectorsize);
10263 err |= BYTES_UNALIGNED;
10265 data_bytes_allocated += disk_num_bytes;
10267 if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
10269 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
10270 fi_key.objectid, fi_key.offset, extent_num_bytes,
10271 root->fs_info->sectorsize);
10272 err |= BYTES_UNALIGNED;
10274 data_bytes_referenced += extent_num_bytes;
10276 owner = btrfs_header_owner(eb);
10278 /* Check the extent item of the file extent in extent tree */
10279 btrfs_init_path(&path);
10280 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10281 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
10282 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
10284 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
10288 leaf = path.nodes[0];
10289 slot = path.slots[0];
10290 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10292 extent_flags = btrfs_extent_flags(leaf, ei);
10294 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
10296 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
10297 disk_bytenr, disk_num_bytes,
10298 BTRFS_EXTENT_FLAG_DATA);
10299 err |= BACKREF_MISMATCH;
10302 /* Check data backref inside that extent item */
10303 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
10304 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10305 ptr = (unsigned long)iref;
10306 end = (unsigned long)ei + item_size;
10307 while (ptr < end) {
10308 iref = (struct btrfs_extent_inline_ref *)ptr;
10309 type = btrfs_extent_inline_ref_type(leaf, iref);
10310 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10312 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10313 ref_root = btrfs_extent_data_ref_root(leaf, dref);
10314 if (ref_root == owner || ref_root == root->objectid)
10315 found_dbackref = 1;
10316 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10317 found_dbackref = !check_tree_block_ref(root, NULL,
10318 btrfs_extent_inline_ref_offset(leaf, iref),
10322 if (found_dbackref)
10324 ptr += btrfs_extent_inline_ref_size(type);
10327 if (!found_dbackref) {
10328 btrfs_release_path(&path);
10330 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
10331 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
10332 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
10333 dbref_key.offset = hash_extent_data_ref(root->objectid,
10334 fi_key.objectid, fi_key.offset);
10336 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10337 &dbref_key, &path, 0, 0);
10339 found_dbackref = 1;
10343 btrfs_release_path(&path);
10346 * Neither inlined nor EXTENT_DATA_REF found, try
10347 * SHARED_DATA_REF as last chance.
10349 dbref_key.objectid = disk_bytenr;
10350 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
10351 dbref_key.offset = eb->start;
10353 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
10354 &dbref_key, &path, 0, 0);
10356 found_dbackref = 1;
10362 if (!found_dbackref)
10363 err |= BACKREF_MISSING;
10364 btrfs_release_path(&path);
10365 if (err & BACKREF_MISSING) {
10366 error("data extent[%llu %llu] backref lost",
10367 disk_bytenr, disk_num_bytes);
10373 * Get real tree block level for the case like shared block
10374 * Return >= 0 as tree level
10375 * Return <0 for error
10377 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
10379 struct extent_buffer *eb;
10380 struct btrfs_path path;
10381 struct btrfs_key key;
10382 struct btrfs_extent_item *ei;
10385 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10390 /* Search extent tree for extent generation and level */
10391 key.objectid = bytenr;
10392 key.type = BTRFS_METADATA_ITEM_KEY;
10393 key.offset = (u64)-1;
10395 btrfs_init_path(&path);
10396 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
10399 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
10407 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10408 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10409 struct btrfs_extent_item);
10410 flags = btrfs_extent_flags(path.nodes[0], ei);
10411 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
10416 /* Get transid for later read_tree_block() check */
10417 transid = btrfs_extent_generation(path.nodes[0], ei);
10419 /* Get backref level as one source */
10420 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10421 backref_level = key.offset;
10423 struct btrfs_tree_block_info *info;
10425 info = (struct btrfs_tree_block_info *)(ei + 1);
10426 backref_level = btrfs_tree_block_level(path.nodes[0], info);
10428 btrfs_release_path(&path);
10430 /* Get level from tree block as an alternative source */
10431 eb = read_tree_block(fs_info, bytenr, nodesize, transid);
10432 if (!extent_buffer_uptodate(eb)) {
10433 free_extent_buffer(eb);
10436 header_level = btrfs_header_level(eb);
10437 free_extent_buffer(eb);
10439 if (header_level != backref_level)
10441 return header_level;
10444 btrfs_release_path(&path);
10449 * Check if a tree block backref is valid (points to a valid tree block)
10450 * if level == -1, level will be resolved
10451 * Return >0 for any error found and print error message
10453 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
10454 u64 bytenr, int level)
10456 struct btrfs_root *root;
10457 struct btrfs_key key;
10458 struct btrfs_path path;
10459 struct extent_buffer *eb;
10460 struct extent_buffer *node;
10461 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10465 /* Query level for level == -1 special case */
10467 level = query_tree_block_level(fs_info, bytenr);
10469 err |= REFERENCER_MISSING;
10473 key.objectid = root_id;
10474 key.type = BTRFS_ROOT_ITEM_KEY;
10475 key.offset = (u64)-1;
10477 root = btrfs_read_fs_root(fs_info, &key);
10478 if (IS_ERR(root)) {
10479 err |= REFERENCER_MISSING;
10483 /* Read out the tree block to get item/node key */
10484 eb = read_tree_block(fs_info, bytenr, root->fs_info->nodesize, 0);
10485 if (!extent_buffer_uptodate(eb)) {
10486 err |= REFERENCER_MISSING;
10487 free_extent_buffer(eb);
10491 /* Empty tree, no need to check key */
10492 if (!btrfs_header_nritems(eb) && !level) {
10493 free_extent_buffer(eb);
10498 btrfs_node_key_to_cpu(eb, &key, 0);
10500 btrfs_item_key_to_cpu(eb, &key, 0);
10502 free_extent_buffer(eb);
10504 btrfs_init_path(&path);
10505 path.lowest_level = level;
10506 /* Search with the first key, to ensure we can reach it */
10507 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10509 err |= REFERENCER_MISSING;
10513 node = path.nodes[level];
10514 if (btrfs_header_bytenr(node) != bytenr) {
10516 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
10517 bytenr, nodesize, bytenr,
10518 btrfs_header_bytenr(node));
10519 err |= REFERENCER_MISMATCH;
10521 if (btrfs_header_level(node) != level) {
10523 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
10524 bytenr, nodesize, level,
10525 btrfs_header_level(node));
10526 err |= REFERENCER_MISMATCH;
10530 btrfs_release_path(&path);
10532 if (err & REFERENCER_MISSING) {
10534 error("extent [%llu %d] lost referencer (owner: %llu)",
10535 bytenr, nodesize, root_id);
10538 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
10539 bytenr, nodesize, root_id, level);
10546 * Check if tree block @eb is tree reloc root.
10547 * Return 0 if it's not or any problem happens
10548 * Return 1 if it's a tree reloc root
10550 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
10551 struct extent_buffer *eb)
10553 struct btrfs_root *tree_reloc_root;
10554 struct btrfs_key key;
10555 u64 bytenr = btrfs_header_bytenr(eb);
10556 u64 owner = btrfs_header_owner(eb);
10559 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10560 key.offset = owner;
10561 key.type = BTRFS_ROOT_ITEM_KEY;
10563 tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
10564 if (IS_ERR(tree_reloc_root))
10567 if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
10569 btrfs_free_fs_root(tree_reloc_root);
10574 * Check referencer for shared block backref
10575 * If level == -1, this function will resolve the level.
10577 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
10578 u64 parent, u64 bytenr, int level)
10580 struct extent_buffer *eb;
10581 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10583 int found_parent = 0;
10586 eb = read_tree_block(fs_info, parent, nodesize, 0);
10587 if (!extent_buffer_uptodate(eb))
10591 level = query_tree_block_level(fs_info, bytenr);
10595 /* It's possible it's a tree reloc root */
10596 if (parent == bytenr) {
10597 if (is_tree_reloc_root(fs_info, eb))
10602 if (level + 1 != btrfs_header_level(eb))
10605 nr = btrfs_header_nritems(eb);
10606 for (i = 0; i < nr; i++) {
10607 if (bytenr == btrfs_node_blockptr(eb, i)) {
10613 free_extent_buffer(eb);
10614 if (!found_parent) {
10616 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
10617 bytenr, nodesize, parent, level);
10618 return REFERENCER_MISSING;
10624 * Check referencer for normal (inlined) data ref
10625 * If len == 0, it will be resolved by searching in extent tree
10627 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
10628 u64 root_id, u64 objectid, u64 offset,
10629 u64 bytenr, u64 len, u32 count)
10631 struct btrfs_root *root;
10632 struct btrfs_root *extent_root = fs_info->extent_root;
10633 struct btrfs_key key;
10634 struct btrfs_path path;
10635 struct extent_buffer *leaf;
10636 struct btrfs_file_extent_item *fi;
10637 u32 found_count = 0;
10642 key.objectid = bytenr;
10643 key.type = BTRFS_EXTENT_ITEM_KEY;
10644 key.offset = (u64)-1;
10646 btrfs_init_path(&path);
10647 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10650 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
10653 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10654 if (key.objectid != bytenr ||
10655 key.type != BTRFS_EXTENT_ITEM_KEY)
10658 btrfs_release_path(&path);
10660 key.objectid = root_id;
10661 key.type = BTRFS_ROOT_ITEM_KEY;
10662 key.offset = (u64)-1;
10663 btrfs_init_path(&path);
10665 root = btrfs_read_fs_root(fs_info, &key);
10669 key.objectid = objectid;
10670 key.type = BTRFS_EXTENT_DATA_KEY;
10672 * It can be nasty as data backref offset is
10673 * file offset - file extent offset, which is smaller or
10674 * equal to original backref offset. The only special case is
10675 * overflow. So we need to special check and do further search.
10677 key.offset = offset & (1ULL << 63) ? 0 : offset;
10679 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10684 * Search afterwards to get correct one
10685 * NOTE: As we must do a comprehensive check on the data backref to
10686 * make sure the dref count also matches, we must iterate all file
10687 * extents for that inode.
10690 leaf = path.nodes[0];
10691 slot = path.slots[0];
10693 if (slot >= btrfs_header_nritems(leaf))
10695 btrfs_item_key_to_cpu(leaf, &key, slot);
10696 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10698 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10700 * Except normal disk bytenr and disk num bytes, we still
10701 * need to do extra check on dbackref offset as
10702 * dbackref offset = file_offset - file_extent_offset
10704 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10705 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10706 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10711 ret = btrfs_next_item(root, &path);
10716 btrfs_release_path(&path);
10717 if (found_count != count) {
10719 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10720 bytenr, len, root_id, objectid, offset, count, found_count);
10721 return REFERENCER_MISSING;
10727 * Check if the referencer of a shared data backref exists
10729 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10730 u64 parent, u64 bytenr)
10732 struct extent_buffer *eb;
10733 struct btrfs_key key;
10734 struct btrfs_file_extent_item *fi;
10735 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10737 int found_parent = 0;
10740 eb = read_tree_block(fs_info, parent, nodesize, 0);
10741 if (!extent_buffer_uptodate(eb))
10744 nr = btrfs_header_nritems(eb);
10745 for (i = 0; i < nr; i++) {
10746 btrfs_item_key_to_cpu(eb, &key, i);
10747 if (key.type != BTRFS_EXTENT_DATA_KEY)
10750 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10751 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10754 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10761 free_extent_buffer(eb);
10762 if (!found_parent) {
10763 error("shared extent %llu referencer lost (parent: %llu)",
10765 return REFERENCER_MISSING;
10771 * This function will check a given extent item, including its backref and
10772 * itself (like crossing stripe boundary and type)
10774 * Since we don't use extent_record anymore, introduce new error bit
10776 static int check_extent_item(struct btrfs_fs_info *fs_info,
10777 struct extent_buffer *eb, int slot)
10779 struct btrfs_extent_item *ei;
10780 struct btrfs_extent_inline_ref *iref;
10781 struct btrfs_extent_data_ref *dref;
10785 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10786 u32 item_size = btrfs_item_size_nr(eb, slot);
10791 struct btrfs_key key;
10795 btrfs_item_key_to_cpu(eb, &key, slot);
10796 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10797 bytes_used += key.offset;
10799 bytes_used += nodesize;
10801 if (item_size < sizeof(*ei)) {
10803 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10804 * old thing when on disk format is still un-determined.
10805 * No need to care about it anymore
10807 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10811 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10812 flags = btrfs_extent_flags(eb, ei);
10814 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10816 if (metadata && check_crossing_stripes(global_info, key.objectid,
10818 error("bad metadata [%llu, %llu) crossing stripe boundary",
10819 key.objectid, key.objectid + nodesize);
10820 err |= CROSSING_STRIPE_BOUNDARY;
10823 ptr = (unsigned long)(ei + 1);
10825 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10826 /* Old EXTENT_ITEM metadata */
10827 struct btrfs_tree_block_info *info;
10829 info = (struct btrfs_tree_block_info *)ptr;
10830 level = btrfs_tree_block_level(eb, info);
10831 ptr += sizeof(struct btrfs_tree_block_info);
10833 /* New METADATA_ITEM */
10834 level = key.offset;
10836 end = (unsigned long)ei + item_size;
10839 /* Reached extent item end normally */
10843 /* Beyond extent item end, wrong item size */
10845 err |= ITEM_SIZE_MISMATCH;
10846 error("extent item at bytenr %llu slot %d has wrong size",
10851 /* Now check every backref in this extent item */
10852 iref = (struct btrfs_extent_inline_ref *)ptr;
10853 type = btrfs_extent_inline_ref_type(eb, iref);
10854 offset = btrfs_extent_inline_ref_offset(eb, iref);
10856 case BTRFS_TREE_BLOCK_REF_KEY:
10857 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10861 case BTRFS_SHARED_BLOCK_REF_KEY:
10862 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10866 case BTRFS_EXTENT_DATA_REF_KEY:
10867 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10868 ret = check_extent_data_backref(fs_info,
10869 btrfs_extent_data_ref_root(eb, dref),
10870 btrfs_extent_data_ref_objectid(eb, dref),
10871 btrfs_extent_data_ref_offset(eb, dref),
10872 key.objectid, key.offset,
10873 btrfs_extent_data_ref_count(eb, dref));
10876 case BTRFS_SHARED_DATA_REF_KEY:
10877 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10881 error("extent[%llu %d %llu] has unknown ref type: %d",
10882 key.objectid, key.type, key.offset, type);
10883 err |= UNKNOWN_TYPE;
10887 ptr += btrfs_extent_inline_ref_size(type);
10895 * Check if a dev extent item is referred correctly by its chunk
10897 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10898 struct extent_buffer *eb, int slot)
10900 struct btrfs_root *chunk_root = fs_info->chunk_root;
10901 struct btrfs_dev_extent *ptr;
10902 struct btrfs_path path;
10903 struct btrfs_key chunk_key;
10904 struct btrfs_key devext_key;
10905 struct btrfs_chunk *chunk;
10906 struct extent_buffer *l;
10910 int found_chunk = 0;
10913 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10914 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10915 length = btrfs_dev_extent_length(eb, ptr);
10917 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10918 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10919 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10921 btrfs_init_path(&path);
10922 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10927 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10928 ret = btrfs_check_chunk_valid(chunk_root, l, chunk, path.slots[0],
10933 if (btrfs_stripe_length(fs_info, l, chunk) != length)
10936 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10937 for (i = 0; i < num_stripes; i++) {
10938 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10939 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10941 if (devid == devext_key.objectid &&
10942 offset == devext_key.offset) {
10948 btrfs_release_path(&path);
10949 if (!found_chunk) {
10951 "device extent[%llu, %llu, %llu] did not find the related chunk",
10952 devext_key.objectid, devext_key.offset, length);
10953 return REFERENCER_MISSING;
10959 * Check if the used space is correct with the dev item
10961 static int check_dev_item(struct btrfs_fs_info *fs_info,
10962 struct extent_buffer *eb, int slot)
10964 struct btrfs_root *dev_root = fs_info->dev_root;
10965 struct btrfs_dev_item *dev_item;
10966 struct btrfs_path path;
10967 struct btrfs_key key;
10968 struct btrfs_dev_extent *ptr;
10974 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10975 dev_id = btrfs_device_id(eb, dev_item);
10976 used = btrfs_device_bytes_used(eb, dev_item);
10978 key.objectid = dev_id;
10979 key.type = BTRFS_DEV_EXTENT_KEY;
10982 btrfs_init_path(&path);
10983 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10985 btrfs_item_key_to_cpu(eb, &key, slot);
10986 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10987 key.objectid, key.type, key.offset);
10988 btrfs_release_path(&path);
10989 return REFERENCER_MISSING;
10992 /* Iterate dev_extents to calculate the used space of a device */
10994 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
10997 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10998 if (key.objectid > dev_id)
11000 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
11003 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
11004 struct btrfs_dev_extent);
11005 total += btrfs_dev_extent_length(path.nodes[0], ptr);
11007 ret = btrfs_next_item(dev_root, &path);
11011 btrfs_release_path(&path);
11013 if (used != total) {
11014 btrfs_item_key_to_cpu(eb, &key, slot);
11016 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
11017 total, used, BTRFS_ROOT_TREE_OBJECTID,
11018 BTRFS_DEV_EXTENT_KEY, dev_id);
11019 return ACCOUNTING_MISMATCH;
11025 * Check a block group item with its referener (chunk) and its used space
11026 * with extent/metadata item
11028 static int check_block_group_item(struct btrfs_fs_info *fs_info,
11029 struct extent_buffer *eb, int slot)
11031 struct btrfs_root *extent_root = fs_info->extent_root;
11032 struct btrfs_root *chunk_root = fs_info->chunk_root;
11033 struct btrfs_block_group_item *bi;
11034 struct btrfs_block_group_item bg_item;
11035 struct btrfs_path path;
11036 struct btrfs_key bg_key;
11037 struct btrfs_key chunk_key;
11038 struct btrfs_key extent_key;
11039 struct btrfs_chunk *chunk;
11040 struct extent_buffer *leaf;
11041 struct btrfs_extent_item *ei;
11042 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11050 btrfs_item_key_to_cpu(eb, &bg_key, slot);
11051 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
11052 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
11053 used = btrfs_block_group_used(&bg_item);
11054 bg_flags = btrfs_block_group_flags(&bg_item);
11056 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
11057 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
11058 chunk_key.offset = bg_key.objectid;
11060 btrfs_init_path(&path);
11061 /* Search for the referencer chunk */
11062 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
11065 "block group[%llu %llu] did not find the related chunk item",
11066 bg_key.objectid, bg_key.offset);
11067 err |= REFERENCER_MISSING;
11069 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
11070 struct btrfs_chunk);
11071 if (btrfs_chunk_length(path.nodes[0], chunk) !=
11074 "block group[%llu %llu] related chunk item length does not match",
11075 bg_key.objectid, bg_key.offset);
11076 err |= REFERENCER_MISMATCH;
11079 btrfs_release_path(&path);
11081 /* Search from the block group bytenr */
11082 extent_key.objectid = bg_key.objectid;
11083 extent_key.type = 0;
11084 extent_key.offset = 0;
11086 btrfs_init_path(&path);
11087 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
11091 /* Iterate extent tree to account used space */
11093 leaf = path.nodes[0];
11095 /* Search slot can point to the last item beyond leaf nritems */
11096 if (path.slots[0] >= btrfs_header_nritems(leaf))
11099 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
11100 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
11103 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
11104 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
11106 if (extent_key.objectid < bg_key.objectid)
11109 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
11112 total += extent_key.offset;
11114 ei = btrfs_item_ptr(leaf, path.slots[0],
11115 struct btrfs_extent_item);
11116 flags = btrfs_extent_flags(leaf, ei);
11117 if (flags & BTRFS_EXTENT_FLAG_DATA) {
11118 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
11120 "bad extent[%llu, %llu) type mismatch with chunk",
11121 extent_key.objectid,
11122 extent_key.objectid + extent_key.offset);
11123 err |= CHUNK_TYPE_MISMATCH;
11125 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
11126 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
11127 BTRFS_BLOCK_GROUP_METADATA))) {
11129 "bad extent[%llu, %llu) type mismatch with chunk",
11130 extent_key.objectid,
11131 extent_key.objectid + nodesize);
11132 err |= CHUNK_TYPE_MISMATCH;
11136 ret = btrfs_next_item(extent_root, &path);
11142 btrfs_release_path(&path);
11144 if (total != used) {
11146 "block group[%llu %llu] used %llu but extent items used %llu",
11147 bg_key.objectid, bg_key.offset, used, total);
11148 err |= ACCOUNTING_MISMATCH;
11154 * Check a chunk item.
11155 * Including checking all referred dev_extents and block group
11157 static int check_chunk_item(struct btrfs_fs_info *fs_info,
11158 struct extent_buffer *eb, int slot)
11160 struct btrfs_root *extent_root = fs_info->extent_root;
11161 struct btrfs_root *dev_root = fs_info->dev_root;
11162 struct btrfs_path path;
11163 struct btrfs_key chunk_key;
11164 struct btrfs_key bg_key;
11165 struct btrfs_key devext_key;
11166 struct btrfs_chunk *chunk;
11167 struct extent_buffer *leaf;
11168 struct btrfs_block_group_item *bi;
11169 struct btrfs_block_group_item bg_item;
11170 struct btrfs_dev_extent *ptr;
11182 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
11183 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
11184 length = btrfs_chunk_length(eb, chunk);
11185 chunk_end = chunk_key.offset + length;
11186 ret = btrfs_check_chunk_valid(extent_root, eb, chunk, slot,
11189 error("chunk[%llu %llu) is invalid", chunk_key.offset,
11191 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
11194 type = btrfs_chunk_type(eb, chunk);
11196 bg_key.objectid = chunk_key.offset;
11197 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
11198 bg_key.offset = length;
11200 btrfs_init_path(&path);
11201 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
11204 "chunk[%llu %llu) did not find the related block group item",
11205 chunk_key.offset, chunk_end);
11206 err |= REFERENCER_MISSING;
11208 leaf = path.nodes[0];
11209 bi = btrfs_item_ptr(leaf, path.slots[0],
11210 struct btrfs_block_group_item);
11211 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
11213 if (btrfs_block_group_flags(&bg_item) != type) {
11215 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
11216 chunk_key.offset, chunk_end, type,
11217 btrfs_block_group_flags(&bg_item));
11218 err |= REFERENCER_MISSING;
11222 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
11223 stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
11224 for (i = 0; i < num_stripes; i++) {
11225 btrfs_release_path(&path);
11226 btrfs_init_path(&path);
11227 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
11228 devext_key.type = BTRFS_DEV_EXTENT_KEY;
11229 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
11231 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
11234 goto not_match_dev;
11236 leaf = path.nodes[0];
11237 ptr = btrfs_item_ptr(leaf, path.slots[0],
11238 struct btrfs_dev_extent);
11239 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
11240 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
11241 if (objectid != chunk_key.objectid ||
11242 offset != chunk_key.offset ||
11243 btrfs_dev_extent_length(leaf, ptr) != stripe_len)
11244 goto not_match_dev;
11247 err |= BACKREF_MISSING;
11249 "chunk[%llu %llu) stripe %d did not find the related dev extent",
11250 chunk_key.objectid, chunk_end, i);
11253 btrfs_release_path(&path);
11259 * Main entry function to check known items and update related accounting info
11261 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
11263 struct btrfs_fs_info *fs_info = root->fs_info;
11264 struct btrfs_key key;
11267 struct btrfs_extent_data_ref *dref;
11272 btrfs_item_key_to_cpu(eb, &key, slot);
11276 case BTRFS_EXTENT_DATA_KEY:
11277 ret = check_extent_data_item(root, eb, slot);
11280 case BTRFS_BLOCK_GROUP_ITEM_KEY:
11281 ret = check_block_group_item(fs_info, eb, slot);
11284 case BTRFS_DEV_ITEM_KEY:
11285 ret = check_dev_item(fs_info, eb, slot);
11288 case BTRFS_CHUNK_ITEM_KEY:
11289 ret = check_chunk_item(fs_info, eb, slot);
11292 case BTRFS_DEV_EXTENT_KEY:
11293 ret = check_dev_extent_item(fs_info, eb, slot);
11296 case BTRFS_EXTENT_ITEM_KEY:
11297 case BTRFS_METADATA_ITEM_KEY:
11298 ret = check_extent_item(fs_info, eb, slot);
11301 case BTRFS_EXTENT_CSUM_KEY:
11302 total_csum_bytes += btrfs_item_size_nr(eb, slot);
11304 case BTRFS_TREE_BLOCK_REF_KEY:
11305 ret = check_tree_block_backref(fs_info, key.offset,
11309 case BTRFS_EXTENT_DATA_REF_KEY:
11310 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
11311 ret = check_extent_data_backref(fs_info,
11312 btrfs_extent_data_ref_root(eb, dref),
11313 btrfs_extent_data_ref_objectid(eb, dref),
11314 btrfs_extent_data_ref_offset(eb, dref),
11316 btrfs_extent_data_ref_count(eb, dref));
11319 case BTRFS_SHARED_BLOCK_REF_KEY:
11320 ret = check_shared_block_backref(fs_info, key.offset,
11324 case BTRFS_SHARED_DATA_REF_KEY:
11325 ret = check_shared_data_backref(fs_info, key.offset,
11333 if (++slot < btrfs_header_nritems(eb))
11340 * Helper function for later fs/subvol tree check. To determine if a tree
11341 * block should be checked.
11342 * This function will ensure only the direct referencer with lowest rootid to
11343 * check a fs/subvolume tree block.
11345 * Backref check at extent tree would detect errors like missing subvolume
11346 * tree, so we can do aggressive check to reduce duplicated checks.
11348 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
11350 struct btrfs_root *extent_root = root->fs_info->extent_root;
11351 struct btrfs_key key;
11352 struct btrfs_path path;
11353 struct extent_buffer *leaf;
11355 struct btrfs_extent_item *ei;
11361 struct btrfs_extent_inline_ref *iref;
11364 btrfs_init_path(&path);
11365 key.objectid = btrfs_header_bytenr(eb);
11366 key.type = BTRFS_METADATA_ITEM_KEY;
11367 key.offset = (u64)-1;
11370 * Any failure in backref resolving means we can't determine
11371 * whom the tree block belongs to.
11372 * So in that case, we need to check that tree block
11374 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11378 ret = btrfs_previous_extent_item(extent_root, &path,
11379 btrfs_header_bytenr(eb));
11383 leaf = path.nodes[0];
11384 slot = path.slots[0];
11385 btrfs_item_key_to_cpu(leaf, &key, slot);
11386 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11388 if (key.type == BTRFS_METADATA_ITEM_KEY) {
11389 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11391 struct btrfs_tree_block_info *info;
11393 info = (struct btrfs_tree_block_info *)(ei + 1);
11394 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11397 item_size = btrfs_item_size_nr(leaf, slot);
11398 ptr = (unsigned long)iref;
11399 end = (unsigned long)ei + item_size;
11400 while (ptr < end) {
11401 iref = (struct btrfs_extent_inline_ref *)ptr;
11402 type = btrfs_extent_inline_ref_type(leaf, iref);
11403 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11406 * We only check the tree block if current root is
11407 * the lowest referencer of it.
11409 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
11410 offset < root->objectid) {
11411 btrfs_release_path(&path);
11415 ptr += btrfs_extent_inline_ref_size(type);
11418 * Normally we should also check keyed tree block ref, but that may be
11419 * very time consuming. Inlined ref should already make us skip a lot
11420 * of refs now. So skip search keyed tree block ref.
11424 btrfs_release_path(&path);
11429 * Traversal function for tree block. We will do:
11430 * 1) Skip shared fs/subvolume tree blocks
11431 * 2) Update related bytes accounting
11432 * 3) Pre-order traversal
11434 static int traverse_tree_block(struct btrfs_root *root,
11435 struct extent_buffer *node)
11437 struct extent_buffer *eb;
11438 struct btrfs_key key;
11439 struct btrfs_key drop_key;
11447 * Skip shared fs/subvolume tree block, in that case they will
11448 * be checked by referencer with lowest rootid
11450 if (is_fstree(root->objectid) && !should_check(root, node))
11453 /* Update bytes accounting */
11454 total_btree_bytes += node->len;
11455 if (fs_root_objectid(btrfs_header_owner(node)))
11456 total_fs_tree_bytes += node->len;
11457 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
11458 total_extent_tree_bytes += node->len;
11459 if (!found_old_backref &&
11460 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
11461 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
11462 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
11463 found_old_backref = 1;
11465 /* pre-order tranversal, check itself first */
11466 level = btrfs_header_level(node);
11467 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
11468 btrfs_header_level(node),
11469 btrfs_header_owner(node));
11473 "check %s failed root %llu bytenr %llu level %d, force continue check",
11474 level ? "node":"leaf", root->objectid,
11475 btrfs_header_bytenr(node), btrfs_header_level(node));
11478 btree_space_waste += btrfs_leaf_free_space(root, node);
11479 ret = check_leaf_items(root, node);
11484 nr = btrfs_header_nritems(node);
11485 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
11486 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
11487 sizeof(struct btrfs_key_ptr);
11489 /* Then check all its children */
11490 for (i = 0; i < nr; i++) {
11491 u64 blocknr = btrfs_node_blockptr(node, i);
11493 btrfs_node_key_to_cpu(node, &key, i);
11494 if (level == root->root_item.drop_level &&
11495 is_dropped_key(&key, &drop_key))
11499 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
11500 * to call the function itself.
11502 eb = read_tree_block(root->fs_info, blocknr,
11503 root->fs_info->nodesize, 0);
11504 if (extent_buffer_uptodate(eb)) {
11505 ret = traverse_tree_block(root, eb);
11508 free_extent_buffer(eb);
11515 * Low memory usage version check_chunks_and_extents.
11517 static int check_chunks_and_extents_v2(struct btrfs_root *root)
11519 struct btrfs_path path;
11520 struct btrfs_key key;
11521 struct btrfs_root *root1;
11522 struct btrfs_root *cur_root;
11526 root1 = root->fs_info->chunk_root;
11527 ret = traverse_tree_block(root1, root1->node);
11530 root1 = root->fs_info->tree_root;
11531 ret = traverse_tree_block(root1, root1->node);
11534 btrfs_init_path(&path);
11535 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
11537 key.type = BTRFS_ROOT_ITEM_KEY;
11539 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
11541 error("cannot find extent treet in tree_root");
11546 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11547 if (key.type != BTRFS_ROOT_ITEM_KEY)
11549 key.offset = (u64)-1;
11551 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11552 cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
11555 cur_root = btrfs_read_fs_root(root->fs_info, &key);
11556 if (IS_ERR(cur_root) || !cur_root) {
11557 error("failed to read tree: %lld", key.objectid);
11561 ret = traverse_tree_block(cur_root, cur_root->node);
11564 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11565 btrfs_free_fs_root(cur_root);
11567 ret = btrfs_next_item(root1, &path);
11573 btrfs_release_path(&path);
11577 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
11578 struct btrfs_root *root, int overwrite)
11580 struct extent_buffer *c;
11581 struct extent_buffer *old = root->node;
11584 struct btrfs_disk_key disk_key = {0,0,0};
11590 extent_buffer_get(c);
11593 c = btrfs_alloc_free_block(trans, root,
11594 root->fs_info->nodesize,
11595 root->root_key.objectid,
11596 &disk_key, level, 0, 0);
11599 extent_buffer_get(c);
11603 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
11604 btrfs_set_header_level(c, level);
11605 btrfs_set_header_bytenr(c, c->start);
11606 btrfs_set_header_generation(c, trans->transid);
11607 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
11608 btrfs_set_header_owner(c, root->root_key.objectid);
11610 write_extent_buffer(c, root->fs_info->fsid,
11611 btrfs_header_fsid(), BTRFS_FSID_SIZE);
11613 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
11614 btrfs_header_chunk_tree_uuid(c),
11617 btrfs_mark_buffer_dirty(c);
11619 * this case can happen in the following case:
11621 * 1.overwrite previous root.
11623 * 2.reinit reloc data root, this is because we skip pin
11624 * down reloc data tree before which means we can allocate
11625 * same block bytenr here.
11627 if (old->start == c->start) {
11628 btrfs_set_root_generation(&root->root_item,
11630 root->root_item.level = btrfs_header_level(root->node);
11631 ret = btrfs_update_root(trans, root->fs_info->tree_root,
11632 &root->root_key, &root->root_item);
11634 free_extent_buffer(c);
11638 free_extent_buffer(old);
11640 add_root_to_dirty_list(root);
11644 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
11645 struct extent_buffer *eb, int tree_root)
11647 struct extent_buffer *tmp;
11648 struct btrfs_root_item *ri;
11649 struct btrfs_key key;
11652 int level = btrfs_header_level(eb);
11658 * If we have pinned this block before, don't pin it again.
11659 * This can not only avoid forever loop with broken filesystem
11660 * but also give us some speedups.
11662 if (test_range_bit(&fs_info->pinned_extents, eb->start,
11663 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
11666 btrfs_pin_extent(fs_info, eb->start, eb->len);
11668 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11669 nritems = btrfs_header_nritems(eb);
11670 for (i = 0; i < nritems; i++) {
11672 btrfs_item_key_to_cpu(eb, &key, i);
11673 if (key.type != BTRFS_ROOT_ITEM_KEY)
11675 /* Skip the extent root and reloc roots */
11676 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
11677 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
11678 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
11680 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
11681 bytenr = btrfs_disk_root_bytenr(eb, ri);
11684 * If at any point we start needing the real root we
11685 * will have to build a stump root for the root we are
11686 * in, but for now this doesn't actually use the root so
11687 * just pass in extent_root.
11689 tmp = read_tree_block(fs_info, bytenr, nodesize, 0);
11690 if (!extent_buffer_uptodate(tmp)) {
11691 fprintf(stderr, "Error reading root block\n");
11694 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11695 free_extent_buffer(tmp);
11699 bytenr = btrfs_node_blockptr(eb, i);
11701 /* If we aren't the tree root don't read the block */
11702 if (level == 1 && !tree_root) {
11703 btrfs_pin_extent(fs_info, bytenr, nodesize);
11707 tmp = read_tree_block(fs_info, bytenr,
11709 if (!extent_buffer_uptodate(tmp)) {
11710 fprintf(stderr, "Error reading tree block\n");
11713 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11714 free_extent_buffer(tmp);
11723 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11727 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11731 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11734 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11736 struct btrfs_block_group_cache *cache;
11737 struct btrfs_path path;
11738 struct extent_buffer *leaf;
11739 struct btrfs_chunk *chunk;
11740 struct btrfs_key key;
11744 btrfs_init_path(&path);
11746 key.type = BTRFS_CHUNK_ITEM_KEY;
11748 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11750 btrfs_release_path(&path);
11755 * We do this in case the block groups were screwed up and had alloc
11756 * bits that aren't actually set on the chunks. This happens with
11757 * restored images every time and could happen in real life I guess.
11759 fs_info->avail_data_alloc_bits = 0;
11760 fs_info->avail_metadata_alloc_bits = 0;
11761 fs_info->avail_system_alloc_bits = 0;
11763 /* First we need to create the in-memory block groups */
11765 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11766 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11768 btrfs_release_path(&path);
11776 leaf = path.nodes[0];
11777 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11778 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11783 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11784 btrfs_add_block_group(fs_info, 0,
11785 btrfs_chunk_type(leaf, chunk),
11786 key.objectid, key.offset,
11787 btrfs_chunk_length(leaf, chunk));
11788 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11789 key.offset + btrfs_chunk_length(leaf, chunk));
11794 cache = btrfs_lookup_first_block_group(fs_info, start);
11798 start = cache->key.objectid + cache->key.offset;
11801 btrfs_release_path(&path);
11805 static int reset_balance(struct btrfs_trans_handle *trans,
11806 struct btrfs_fs_info *fs_info)
11808 struct btrfs_root *root = fs_info->tree_root;
11809 struct btrfs_path path;
11810 struct extent_buffer *leaf;
11811 struct btrfs_key key;
11812 int del_slot, del_nr = 0;
11816 btrfs_init_path(&path);
11817 key.objectid = BTRFS_BALANCE_OBJECTID;
11818 key.type = BTRFS_BALANCE_ITEM_KEY;
11820 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11825 goto reinit_data_reloc;
11830 ret = btrfs_del_item(trans, root, &path);
11833 btrfs_release_path(&path);
11835 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11836 key.type = BTRFS_ROOT_ITEM_KEY;
11838 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11842 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11847 ret = btrfs_del_items(trans, root, &path,
11854 btrfs_release_path(&path);
11857 ret = btrfs_search_slot(trans, root, &key, &path,
11864 leaf = path.nodes[0];
11865 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11866 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11868 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11873 del_slot = path.slots[0];
11882 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11886 btrfs_release_path(&path);
11889 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11890 key.type = BTRFS_ROOT_ITEM_KEY;
11891 key.offset = (u64)-1;
11892 root = btrfs_read_fs_root(fs_info, &key);
11893 if (IS_ERR(root)) {
11894 fprintf(stderr, "Error reading data reloc tree\n");
11895 ret = PTR_ERR(root);
11898 record_root_in_trans(trans, root);
11899 ret = btrfs_fsck_reinit_root(trans, root, 0);
11902 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11904 btrfs_release_path(&path);
11908 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11909 struct btrfs_fs_info *fs_info)
11915 * The only reason we don't do this is because right now we're just
11916 * walking the trees we find and pinning down their bytes, we don't look
11917 * at any of the leaves. In order to do mixed groups we'd have to check
11918 * the leaves of any fs roots and pin down the bytes for any file
11919 * extents we find. Not hard but why do it if we don't have to?
11921 if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
11922 fprintf(stderr, "We don't support re-initing the extent tree "
11923 "for mixed block groups yet, please notify a btrfs "
11924 "developer you want to do this so they can add this "
11925 "functionality.\n");
11930 * first we need to walk all of the trees except the extent tree and pin
11931 * down the bytes that are in use so we don't overwrite any existing
11934 ret = pin_metadata_blocks(fs_info);
11936 fprintf(stderr, "error pinning down used bytes\n");
11941 * Need to drop all the block groups since we're going to recreate all
11944 btrfs_free_block_groups(fs_info);
11945 ret = reset_block_groups(fs_info);
11947 fprintf(stderr, "error resetting the block groups\n");
11951 /* Ok we can allocate now, reinit the extent root */
11952 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11954 fprintf(stderr, "extent root initialization failed\n");
11956 * When the transaction code is updated we should end the
11957 * transaction, but for now progs only knows about commit so
11958 * just return an error.
11964 * Now we have all the in-memory block groups setup so we can make
11965 * allocations properly, and the metadata we care about is safe since we
11966 * pinned all of it above.
11969 struct btrfs_block_group_cache *cache;
11971 cache = btrfs_lookup_first_block_group(fs_info, start);
11974 start = cache->key.objectid + cache->key.offset;
11975 ret = btrfs_insert_item(trans, fs_info->extent_root,
11976 &cache->key, &cache->item,
11977 sizeof(cache->item));
11979 fprintf(stderr, "Error adding block group\n");
11982 btrfs_extent_post_op(trans, fs_info->extent_root);
11985 ret = reset_balance(trans, fs_info);
11987 fprintf(stderr, "error resetting the pending balance\n");
11992 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11994 struct btrfs_path path;
11995 struct btrfs_trans_handle *trans;
11996 struct btrfs_key key;
11999 printf("Recowing metadata block %llu\n", eb->start);
12000 key.objectid = btrfs_header_owner(eb);
12001 key.type = BTRFS_ROOT_ITEM_KEY;
12002 key.offset = (u64)-1;
12004 root = btrfs_read_fs_root(root->fs_info, &key);
12005 if (IS_ERR(root)) {
12006 fprintf(stderr, "Couldn't find owner root %llu\n",
12008 return PTR_ERR(root);
12011 trans = btrfs_start_transaction(root, 1);
12013 return PTR_ERR(trans);
12015 btrfs_init_path(&path);
12016 path.lowest_level = btrfs_header_level(eb);
12017 if (path.lowest_level)
12018 btrfs_node_key_to_cpu(eb, &key, 0);
12020 btrfs_item_key_to_cpu(eb, &key, 0);
12022 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
12023 btrfs_commit_transaction(trans, root);
12024 btrfs_release_path(&path);
12028 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
12030 struct btrfs_path path;
12031 struct btrfs_trans_handle *trans;
12032 struct btrfs_key key;
12035 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
12036 bad->key.type, bad->key.offset);
12037 key.objectid = bad->root_id;
12038 key.type = BTRFS_ROOT_ITEM_KEY;
12039 key.offset = (u64)-1;
12041 root = btrfs_read_fs_root(root->fs_info, &key);
12042 if (IS_ERR(root)) {
12043 fprintf(stderr, "Couldn't find owner root %llu\n",
12045 return PTR_ERR(root);
12048 trans = btrfs_start_transaction(root, 1);
12050 return PTR_ERR(trans);
12052 btrfs_init_path(&path);
12053 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
12059 ret = btrfs_del_item(trans, root, &path);
12061 btrfs_commit_transaction(trans, root);
12062 btrfs_release_path(&path);
12066 static int zero_log_tree(struct btrfs_root *root)
12068 struct btrfs_trans_handle *trans;
12071 trans = btrfs_start_transaction(root, 1);
12072 if (IS_ERR(trans)) {
12073 ret = PTR_ERR(trans);
12076 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
12077 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
12078 ret = btrfs_commit_transaction(trans, root);
12082 static int populate_csum(struct btrfs_trans_handle *trans,
12083 struct btrfs_root *csum_root, char *buf, u64 start,
12086 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12091 while (offset < len) {
12092 sectorsize = fs_info->sectorsize;
12093 ret = read_extent_data(fs_info, buf, start + offset,
12097 ret = btrfs_csum_file_block(trans, csum_root, start + len,
12098 start + offset, buf, sectorsize);
12101 offset += sectorsize;
12106 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
12107 struct btrfs_root *csum_root,
12108 struct btrfs_root *cur_root)
12110 struct btrfs_path path;
12111 struct btrfs_key key;
12112 struct extent_buffer *node;
12113 struct btrfs_file_extent_item *fi;
12120 buf = malloc(cur_root->fs_info->sectorsize);
12124 btrfs_init_path(&path);
12128 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
12131 /* Iterate all regular file extents and fill its csum */
12133 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12135 if (key.type != BTRFS_EXTENT_DATA_KEY)
12137 node = path.nodes[0];
12138 slot = path.slots[0];
12139 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
12140 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
12142 start = btrfs_file_extent_disk_bytenr(node, fi);
12143 len = btrfs_file_extent_disk_num_bytes(node, fi);
12145 ret = populate_csum(trans, csum_root, buf, start, len);
12146 if (ret == -EEXIST)
12152 * TODO: if next leaf is corrupted, jump to nearest next valid
12155 ret = btrfs_next_item(cur_root, &path);
12165 btrfs_release_path(&path);
12170 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
12171 struct btrfs_root *csum_root)
12173 struct btrfs_fs_info *fs_info = csum_root->fs_info;
12174 struct btrfs_path path;
12175 struct btrfs_root *tree_root = fs_info->tree_root;
12176 struct btrfs_root *cur_root;
12177 struct extent_buffer *node;
12178 struct btrfs_key key;
12182 btrfs_init_path(&path);
12183 key.objectid = BTRFS_FS_TREE_OBJECTID;
12185 key.type = BTRFS_ROOT_ITEM_KEY;
12186 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
12195 node = path.nodes[0];
12196 slot = path.slots[0];
12197 btrfs_item_key_to_cpu(node, &key, slot);
12198 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
12200 if (key.type != BTRFS_ROOT_ITEM_KEY)
12202 if (!is_fstree(key.objectid))
12204 key.offset = (u64)-1;
12206 cur_root = btrfs_read_fs_root(fs_info, &key);
12207 if (IS_ERR(cur_root) || !cur_root) {
12208 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
12212 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
12217 ret = btrfs_next_item(tree_root, &path);
12227 btrfs_release_path(&path);
12231 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
12232 struct btrfs_root *csum_root)
12234 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
12235 struct btrfs_path path;
12236 struct btrfs_extent_item *ei;
12237 struct extent_buffer *leaf;
12239 struct btrfs_key key;
12242 btrfs_init_path(&path);
12244 key.type = BTRFS_EXTENT_ITEM_KEY;
12246 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12248 btrfs_release_path(&path);
12252 buf = malloc(csum_root->fs_info->sectorsize);
12254 btrfs_release_path(&path);
12259 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
12260 ret = btrfs_next_leaf(extent_root, &path);
12268 leaf = path.nodes[0];
12270 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
12271 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
12276 ei = btrfs_item_ptr(leaf, path.slots[0],
12277 struct btrfs_extent_item);
12278 if (!(btrfs_extent_flags(leaf, ei) &
12279 BTRFS_EXTENT_FLAG_DATA)) {
12284 ret = populate_csum(trans, csum_root, buf, key.objectid,
12291 btrfs_release_path(&path);
12297 * Recalculate the csum and put it into the csum tree.
12299 * Extent tree init will wipe out all the extent info, so in that case, we
12300 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
12301 * will use fs/subvol trees to init the csum tree.
12303 static int fill_csum_tree(struct btrfs_trans_handle *trans,
12304 struct btrfs_root *csum_root,
12305 int search_fs_tree)
12307 if (search_fs_tree)
12308 return fill_csum_tree_from_fs(trans, csum_root);
12310 return fill_csum_tree_from_extent(trans, csum_root);
12313 static void free_roots_info_cache(void)
12315 if (!roots_info_cache)
12318 while (!cache_tree_empty(roots_info_cache)) {
12319 struct cache_extent *entry;
12320 struct root_item_info *rii;
12322 entry = first_cache_extent(roots_info_cache);
12325 remove_cache_extent(roots_info_cache, entry);
12326 rii = container_of(entry, struct root_item_info, cache_extent);
12330 free(roots_info_cache);
12331 roots_info_cache = NULL;
12334 static int build_roots_info_cache(struct btrfs_fs_info *info)
12337 struct btrfs_key key;
12338 struct extent_buffer *leaf;
12339 struct btrfs_path path;
12341 if (!roots_info_cache) {
12342 roots_info_cache = malloc(sizeof(*roots_info_cache));
12343 if (!roots_info_cache)
12345 cache_tree_init(roots_info_cache);
12348 btrfs_init_path(&path);
12350 key.type = BTRFS_EXTENT_ITEM_KEY;
12352 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
12355 leaf = path.nodes[0];
12358 struct btrfs_key found_key;
12359 struct btrfs_extent_item *ei;
12360 struct btrfs_extent_inline_ref *iref;
12361 int slot = path.slots[0];
12366 struct cache_extent *entry;
12367 struct root_item_info *rii;
12369 if (slot >= btrfs_header_nritems(leaf)) {
12370 ret = btrfs_next_leaf(info->extent_root, &path);
12377 leaf = path.nodes[0];
12378 slot = path.slots[0];
12381 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12383 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
12384 found_key.type != BTRFS_METADATA_ITEM_KEY)
12387 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
12388 flags = btrfs_extent_flags(leaf, ei);
12390 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
12391 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
12394 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
12395 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
12396 level = found_key.offset;
12398 struct btrfs_tree_block_info *binfo;
12400 binfo = (struct btrfs_tree_block_info *)(ei + 1);
12401 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
12402 level = btrfs_tree_block_level(leaf, binfo);
12406 * For a root extent, it must be of the following type and the
12407 * first (and only one) iref in the item.
12409 type = btrfs_extent_inline_ref_type(leaf, iref);
12410 if (type != BTRFS_TREE_BLOCK_REF_KEY)
12413 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
12414 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12416 rii = malloc(sizeof(struct root_item_info));
12421 rii->cache_extent.start = root_id;
12422 rii->cache_extent.size = 1;
12423 rii->level = (u8)-1;
12424 entry = &rii->cache_extent;
12425 ret = insert_cache_extent(roots_info_cache, entry);
12428 rii = container_of(entry, struct root_item_info,
12432 ASSERT(rii->cache_extent.start == root_id);
12433 ASSERT(rii->cache_extent.size == 1);
12435 if (level > rii->level || rii->level == (u8)-1) {
12436 rii->level = level;
12437 rii->bytenr = found_key.objectid;
12438 rii->gen = btrfs_extent_generation(leaf, ei);
12439 rii->node_count = 1;
12440 } else if (level == rii->level) {
12448 btrfs_release_path(&path);
12453 static int maybe_repair_root_item(struct btrfs_path *path,
12454 const struct btrfs_key *root_key,
12455 const int read_only_mode)
12457 const u64 root_id = root_key->objectid;
12458 struct cache_extent *entry;
12459 struct root_item_info *rii;
12460 struct btrfs_root_item ri;
12461 unsigned long offset;
12463 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
12466 "Error: could not find extent items for root %llu\n",
12467 root_key->objectid);
12471 rii = container_of(entry, struct root_item_info, cache_extent);
12472 ASSERT(rii->cache_extent.start == root_id);
12473 ASSERT(rii->cache_extent.size == 1);
12475 if (rii->node_count != 1) {
12477 "Error: could not find btree root extent for root %llu\n",
12482 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
12483 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
12485 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
12486 btrfs_root_level(&ri) != rii->level ||
12487 btrfs_root_generation(&ri) != rii->gen) {
12490 * If we're in repair mode but our caller told us to not update
12491 * the root item, i.e. just check if it needs to be updated, don't
12492 * print this message, since the caller will call us again shortly
12493 * for the same root item without read only mode (the caller will
12494 * open a transaction first).
12496 if (!(read_only_mode && repair))
12498 "%sroot item for root %llu,"
12499 " current bytenr %llu, current gen %llu, current level %u,"
12500 " new bytenr %llu, new gen %llu, new level %u\n",
12501 (read_only_mode ? "" : "fixing "),
12503 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
12504 btrfs_root_level(&ri),
12505 rii->bytenr, rii->gen, rii->level);
12507 if (btrfs_root_generation(&ri) > rii->gen) {
12509 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
12510 root_id, btrfs_root_generation(&ri), rii->gen);
12514 if (!read_only_mode) {
12515 btrfs_set_root_bytenr(&ri, rii->bytenr);
12516 btrfs_set_root_level(&ri, rii->level);
12517 btrfs_set_root_generation(&ri, rii->gen);
12518 write_extent_buffer(path->nodes[0], &ri,
12519 offset, sizeof(ri));
12529 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
12530 * caused read-only snapshots to be corrupted if they were created at a moment
12531 * when the source subvolume/snapshot had orphan items. The issue was that the
12532 * on-disk root items became incorrect, referring to the pre orphan cleanup root
12533 * node instead of the post orphan cleanup root node.
12534 * So this function, and its callees, just detects and fixes those cases. Even
12535 * though the regression was for read-only snapshots, this function applies to
12536 * any snapshot/subvolume root.
12537 * This must be run before any other repair code - not doing it so, makes other
12538 * repair code delete or modify backrefs in the extent tree for example, which
12539 * will result in an inconsistent fs after repairing the root items.
12541 static int repair_root_items(struct btrfs_fs_info *info)
12543 struct btrfs_path path;
12544 struct btrfs_key key;
12545 struct extent_buffer *leaf;
12546 struct btrfs_trans_handle *trans = NULL;
12549 int need_trans = 0;
12551 btrfs_init_path(&path);
12553 ret = build_roots_info_cache(info);
12557 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
12558 key.type = BTRFS_ROOT_ITEM_KEY;
12563 * Avoid opening and committing transactions if a leaf doesn't have
12564 * any root items that need to be fixed, so that we avoid rotating
12565 * backup roots unnecessarily.
12568 trans = btrfs_start_transaction(info->tree_root, 1);
12569 if (IS_ERR(trans)) {
12570 ret = PTR_ERR(trans);
12575 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
12579 leaf = path.nodes[0];
12582 struct btrfs_key found_key;
12584 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
12585 int no_more_keys = find_next_key(&path, &key);
12587 btrfs_release_path(&path);
12589 ret = btrfs_commit_transaction(trans,
12601 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
12603 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
12605 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12608 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
12612 if (!trans && repair) {
12615 btrfs_release_path(&path);
12625 free_roots_info_cache();
12626 btrfs_release_path(&path);
12628 btrfs_commit_transaction(trans, info->tree_root);
12635 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
12637 struct btrfs_trans_handle *trans;
12638 struct btrfs_block_group_cache *bg_cache;
12642 /* Clear all free space cache inodes and its extent data */
12644 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
12647 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
12650 current = bg_cache->key.objectid + bg_cache->key.offset;
12653 /* Don't forget to set cache_generation to -1 */
12654 trans = btrfs_start_transaction(fs_info->tree_root, 0);
12655 if (IS_ERR(trans)) {
12656 error("failed to update super block cache generation");
12657 return PTR_ERR(trans);
12659 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
12660 btrfs_commit_transaction(trans, fs_info->tree_root);
12665 const char * const cmd_check_usage[] = {
12666 "btrfs check [options] <device>",
12667 "Check structural integrity of a filesystem (unmounted).",
12668 "Check structural integrity of an unmounted filesystem. Verify internal",
12669 "trees' consistency and item connectivity. In the repair mode try to",
12670 "fix the problems found. ",
12671 "WARNING: the repair mode is considered dangerous",
12673 "-s|--super <superblock> use this superblock copy",
12674 "-b|--backup use the first valid backup root copy",
12675 "--repair try to repair the filesystem",
12676 "--readonly run in read-only mode (default)",
12677 "--init-csum-tree create a new CRC tree",
12678 "--init-extent-tree create a new extent tree",
12679 "--mode <MODE> allows choice of memory/IO trade-offs",
12680 " where MODE is one of:",
12681 " original - read inodes and extents to memory (requires",
12682 " more memory, does less IO)",
12683 " lowmem - try to use less memory but read blocks again",
12685 "--check-data-csum verify checksums of data blocks",
12686 "-Q|--qgroup-report print a report on qgroup consistency",
12687 "-E|--subvol-extents <subvolid>",
12688 " print subvolume extents and sharing state",
12689 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12690 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12691 "-p|--progress indicate progress",
12692 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12696 int cmd_check(int argc, char **argv)
12698 struct cache_tree root_cache;
12699 struct btrfs_root *root;
12700 struct btrfs_fs_info *info;
12703 u64 tree_root_bytenr = 0;
12704 u64 chunk_root_bytenr = 0;
12705 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12709 int init_csum_tree = 0;
12711 int clear_space_cache = 0;
12712 int qgroup_report = 0;
12713 int qgroups_repaired = 0;
12714 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12718 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12719 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12720 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12721 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12722 static const struct option long_options[] = {
12723 { "super", required_argument, NULL, 's' },
12724 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12725 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12726 { "init-csum-tree", no_argument, NULL,
12727 GETOPT_VAL_INIT_CSUM },
12728 { "init-extent-tree", no_argument, NULL,
12729 GETOPT_VAL_INIT_EXTENT },
12730 { "check-data-csum", no_argument, NULL,
12731 GETOPT_VAL_CHECK_CSUM },
12732 { "backup", no_argument, NULL, 'b' },
12733 { "subvol-extents", required_argument, NULL, 'E' },
12734 { "qgroup-report", no_argument, NULL, 'Q' },
12735 { "tree-root", required_argument, NULL, 'r' },
12736 { "chunk-root", required_argument, NULL,
12737 GETOPT_VAL_CHUNK_TREE },
12738 { "progress", no_argument, NULL, 'p' },
12739 { "mode", required_argument, NULL,
12741 { "clear-space-cache", required_argument, NULL,
12742 GETOPT_VAL_CLEAR_SPACE_CACHE},
12743 { NULL, 0, NULL, 0}
12746 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12750 case 'a': /* ignored */ break;
12752 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12755 num = arg_strtou64(optarg);
12756 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12758 "super mirror should be less than %d",
12759 BTRFS_SUPER_MIRROR_MAX);
12762 bytenr = btrfs_sb_offset(((int)num));
12763 printf("using SB copy %llu, bytenr %llu\n", num,
12764 (unsigned long long)bytenr);
12770 subvolid = arg_strtou64(optarg);
12773 tree_root_bytenr = arg_strtou64(optarg);
12775 case GETOPT_VAL_CHUNK_TREE:
12776 chunk_root_bytenr = arg_strtou64(optarg);
12779 ctx.progress_enabled = true;
12783 usage(cmd_check_usage);
12784 case GETOPT_VAL_REPAIR:
12785 printf("enabling repair mode\n");
12787 ctree_flags |= OPEN_CTREE_WRITES;
12789 case GETOPT_VAL_READONLY:
12792 case GETOPT_VAL_INIT_CSUM:
12793 printf("Creating a new CRC tree\n");
12794 init_csum_tree = 1;
12796 ctree_flags |= OPEN_CTREE_WRITES;
12798 case GETOPT_VAL_INIT_EXTENT:
12799 init_extent_tree = 1;
12800 ctree_flags |= (OPEN_CTREE_WRITES |
12801 OPEN_CTREE_NO_BLOCK_GROUPS);
12804 case GETOPT_VAL_CHECK_CSUM:
12805 check_data_csum = 1;
12807 case GETOPT_VAL_MODE:
12808 check_mode = parse_check_mode(optarg);
12809 if (check_mode == CHECK_MODE_UNKNOWN) {
12810 error("unknown mode: %s", optarg);
12814 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12815 if (strcmp(optarg, "v1") == 0) {
12816 clear_space_cache = 1;
12817 } else if (strcmp(optarg, "v2") == 0) {
12818 clear_space_cache = 2;
12819 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12822 "invalid argument to --clear-space-cache, must be v1 or v2");
12825 ctree_flags |= OPEN_CTREE_WRITES;
12830 if (check_argc_exact(argc - optind, 1))
12831 usage(cmd_check_usage);
12833 if (ctx.progress_enabled) {
12834 ctx.tp = TASK_NOTHING;
12835 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12838 /* This check is the only reason for --readonly to exist */
12839 if (readonly && repair) {
12840 error("repair options are not compatible with --readonly");
12845 * Not supported yet
12847 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12848 error("low memory mode doesn't support repair yet");
12853 cache_tree_init(&root_cache);
12855 if((ret = check_mounted(argv[optind])) < 0) {
12856 error("could not check mount status: %s", strerror(-ret));
12860 error("%s is currently mounted, aborting", argv[optind]);
12866 /* only allow partial opening under repair mode */
12868 ctree_flags |= OPEN_CTREE_PARTIAL;
12870 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12871 chunk_root_bytenr, ctree_flags);
12873 error("cannot open file system");
12879 global_info = info;
12880 root = info->fs_root;
12881 if (clear_space_cache == 1) {
12882 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12884 "free space cache v2 detected, use --clear-space-cache v2");
12888 printf("Clearing free space cache\n");
12889 ret = clear_free_space_cache(info);
12891 error("failed to clear free space cache");
12894 printf("Free space cache cleared\n");
12897 } else if (clear_space_cache == 2) {
12898 if (!btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) {
12899 printf("no free space cache v2 to clear\n");
12903 printf("Clear free space cache v2\n");
12904 ret = btrfs_clear_free_space_tree(info);
12906 error("failed to clear free space cache v2: %d", ret);
12909 printf("free space cache v2 cleared\n");
12915 * repair mode will force us to commit transaction which
12916 * will make us fail to load log tree when mounting.
12918 if (repair && btrfs_super_log_root(info->super_copy)) {
12919 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12925 ret = zero_log_tree(root);
12928 error("failed to zero log tree: %d", ret);
12933 uuid_unparse(info->super_copy->fsid, uuidbuf);
12934 if (qgroup_report) {
12935 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12937 ret = qgroup_verify_all(info);
12944 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12945 subvolid, argv[optind], uuidbuf);
12946 ret = print_extent_state(info, subvolid);
12950 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12952 if (!extent_buffer_uptodate(info->tree_root->node) ||
12953 !extent_buffer_uptodate(info->dev_root->node) ||
12954 !extent_buffer_uptodate(info->chunk_root->node)) {
12955 error("critical roots corrupted, unable to check the filesystem");
12961 if (init_extent_tree || init_csum_tree) {
12962 struct btrfs_trans_handle *trans;
12964 trans = btrfs_start_transaction(info->extent_root, 0);
12965 if (IS_ERR(trans)) {
12966 error("error starting transaction");
12967 ret = PTR_ERR(trans);
12972 if (init_extent_tree) {
12973 printf("Creating a new extent tree\n");
12974 ret = reinit_extent_tree(trans, info);
12980 if (init_csum_tree) {
12981 printf("Reinitialize checksum tree\n");
12982 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12984 error("checksum tree initialization failed: %d",
12991 ret = fill_csum_tree(trans, info->csum_root,
12995 error("checksum tree refilling failed: %d", ret);
13000 * Ok now we commit and run the normal fsck, which will add
13001 * extent entries for all of the items it finds.
13003 ret = btrfs_commit_transaction(trans, info->extent_root);
13008 if (!extent_buffer_uptodate(info->extent_root->node)) {
13009 error("critical: extent_root, unable to check the filesystem");
13014 if (!extent_buffer_uptodate(info->csum_root->node)) {
13015 error("critical: csum_root, unable to check the filesystem");
13021 if (!ctx.progress_enabled)
13022 fprintf(stderr, "checking extents\n");
13023 if (check_mode == CHECK_MODE_LOWMEM)
13024 ret = check_chunks_and_extents_v2(root);
13026 ret = check_chunks_and_extents(root);
13030 "errors found in extent allocation tree or chunk allocation");
13032 ret = repair_root_items(info);
13035 error("failed to repair root items: %s", strerror(-ret));
13039 fprintf(stderr, "Fixed %d roots.\n", ret);
13041 } else if (ret > 0) {
13043 "Found %d roots with an outdated root item.\n",
13046 "Please run a filesystem check with the option --repair to fix them.\n");
13052 if (!ctx.progress_enabled) {
13053 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13054 fprintf(stderr, "checking free space tree\n");
13056 fprintf(stderr, "checking free space cache\n");
13058 ret = check_space_cache(root);
13061 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
13062 error("errors found in free space tree");
13064 error("errors found in free space cache");
13069 * We used to have to have these hole extents in between our real
13070 * extents so if we don't have this flag set we need to make sure there
13071 * are no gaps in the file extents for inodes, otherwise we can just
13072 * ignore it when this happens.
13074 no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
13075 if (!ctx.progress_enabled)
13076 fprintf(stderr, "checking fs roots\n");
13077 if (check_mode == CHECK_MODE_LOWMEM)
13078 ret = check_fs_roots_v2(root->fs_info);
13080 ret = check_fs_roots(root, &root_cache);
13083 error("errors found in fs roots");
13087 fprintf(stderr, "checking csums\n");
13088 ret = check_csums(root);
13091 error("errors found in csum tree");
13095 fprintf(stderr, "checking root refs\n");
13096 /* For low memory mode, check_fs_roots_v2 handles root refs */
13097 if (check_mode != CHECK_MODE_LOWMEM) {
13098 ret = check_root_refs(root, &root_cache);
13101 error("errors found in root refs");
13106 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
13107 struct extent_buffer *eb;
13109 eb = list_first_entry(&root->fs_info->recow_ebs,
13110 struct extent_buffer, recow);
13111 list_del_init(&eb->recow);
13112 ret = recow_extent_buffer(root, eb);
13115 error("fails to fix transid errors");
13120 while (!list_empty(&delete_items)) {
13121 struct bad_item *bad;
13123 bad = list_first_entry(&delete_items, struct bad_item, list);
13124 list_del_init(&bad->list);
13126 ret = delete_bad_item(root, bad);
13132 if (info->quota_enabled) {
13133 fprintf(stderr, "checking quota groups\n");
13134 ret = qgroup_verify_all(info);
13137 error("failed to check quota groups");
13141 ret = repair_qgroups(info, &qgroups_repaired);
13144 error("failed to repair quota groups");
13150 if (!list_empty(&root->fs_info->recow_ebs)) {
13151 error("transid errors in file system");
13156 if (found_old_backref) { /*
13157 * there was a disk format change when mixed
13158 * backref was in testing tree. The old format
13159 * existed about one week.
13161 printf("\n * Found old mixed backref format. "
13162 "The old format is not supported! *"
13163 "\n * Please mount the FS in readonly mode, "
13164 "backup data and re-format the FS. *\n\n");
13167 printf("found %llu bytes used, ",
13168 (unsigned long long)bytes_used);
13170 printf("error(s) found\n");
13172 printf("no error found\n");
13173 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
13174 printf("total tree bytes: %llu\n",
13175 (unsigned long long)total_btree_bytes);
13176 printf("total fs tree bytes: %llu\n",
13177 (unsigned long long)total_fs_tree_bytes);
13178 printf("total extent tree bytes: %llu\n",
13179 (unsigned long long)total_extent_tree_bytes);
13180 printf("btree space waste bytes: %llu\n",
13181 (unsigned long long)btree_space_waste);
13182 printf("file data blocks allocated: %llu\n referenced %llu\n",
13183 (unsigned long long)data_bytes_allocated,
13184 (unsigned long long)data_bytes_referenced);
13186 free_qgroup_counts();
13187 free_root_recs_tree(&root_cache);
13191 if (ctx.progress_enabled)
13192 task_deinit(ctx.info);