2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
50 TASK_NOTHING, /* have to be the last element */
55 enum task_position tp;
57 struct task_info *info;
60 static u64 bytes_used = 0;
61 static u64 total_csum_bytes = 0;
62 static u64 total_btree_bytes = 0;
63 static u64 total_fs_tree_bytes = 0;
64 static u64 total_extent_tree_bytes = 0;
65 static u64 btree_space_waste = 0;
66 static u64 data_bytes_allocated = 0;
67 static u64 data_bytes_referenced = 0;
68 static int found_old_backref = 0;
69 static LIST_HEAD(duplicate_extents);
70 static LIST_HEAD(delete_items);
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 enum btrfs_check_mode {
82 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
85 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87 struct extent_backref {
88 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 struct data_backref {
102 struct extent_backref node;
116 static inline struct data_backref* to_data_backref(struct extent_backref *back)
118 return container_of(back, struct data_backref, node);
122 * Much like data_backref, just removed the undetermined members
123 * and change it to use list_head.
124 * During extent scan, it is stored in root->orphan_data_extent.
125 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
127 struct orphan_data_extent {
128 struct list_head list;
136 struct tree_backref {
137 struct extent_backref node;
144 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
146 return container_of(back, struct tree_backref, node);
149 /* Explicit initialization for extent_record::flag_block_full_backref */
150 enum { FLAG_UNSET = 2 };
152 struct extent_record {
153 struct list_head backrefs;
154 struct list_head dups;
155 struct list_head list;
156 struct cache_extent cache;
157 struct btrfs_disk_key parent_key;
162 u64 extent_item_refs;
164 u64 parent_generation;
168 unsigned int flag_block_full_backref:2;
169 unsigned int found_rec:1;
170 unsigned int content_checked:1;
171 unsigned int owner_ref_checked:1;
172 unsigned int is_root:1;
173 unsigned int metadata:1;
174 unsigned int bad_full_backref:1;
175 unsigned int crossing_stripes:1;
176 unsigned int wrong_chunk_type:1;
179 static inline struct extent_record* to_extent_record(struct list_head *entry)
181 return container_of(entry, struct extent_record, list);
184 struct inode_backref {
185 struct list_head list;
186 unsigned int found_dir_item:1;
187 unsigned int found_dir_index:1;
188 unsigned int found_inode_ref:1;
198 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
200 return list_entry(entry, struct inode_backref, list);
203 struct root_item_record {
204 struct list_head list;
211 struct btrfs_key drop_key;
214 #define REF_ERR_NO_DIR_ITEM (1 << 0)
215 #define REF_ERR_NO_DIR_INDEX (1 << 1)
216 #define REF_ERR_NO_INODE_REF (1 << 2)
217 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
218 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
219 #define REF_ERR_DUP_INODE_REF (1 << 5)
220 #define REF_ERR_INDEX_UNMATCH (1 << 6)
221 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
222 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
223 #define REF_ERR_NO_ROOT_REF (1 << 9)
224 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
225 #define REF_ERR_DUP_ROOT_REF (1 << 11)
226 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
228 struct file_extent_hole {
234 struct inode_record {
235 struct list_head backrefs;
236 unsigned int checked:1;
237 unsigned int merging:1;
238 unsigned int found_inode_item:1;
239 unsigned int found_dir_item:1;
240 unsigned int found_file_extent:1;
241 unsigned int found_csum_item:1;
242 unsigned int some_csum_missing:1;
243 unsigned int nodatasum:1;
256 struct rb_root holes;
257 struct list_head orphan_extents;
262 #define I_ERR_NO_INODE_ITEM (1 << 0)
263 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
264 #define I_ERR_DUP_INODE_ITEM (1 << 2)
265 #define I_ERR_DUP_DIR_INDEX (1 << 3)
266 #define I_ERR_ODD_DIR_ITEM (1 << 4)
267 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
268 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
269 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
270 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
271 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
272 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
273 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
274 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
275 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
276 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
278 struct root_backref {
279 struct list_head list;
280 unsigned int found_dir_item:1;
281 unsigned int found_dir_index:1;
282 unsigned int found_back_ref:1;
283 unsigned int found_forward_ref:1;
284 unsigned int reachable:1;
293 static inline struct root_backref* to_root_backref(struct list_head *entry)
295 return list_entry(entry, struct root_backref, list);
299 struct list_head backrefs;
300 struct cache_extent cache;
301 unsigned int found_root_item:1;
307 struct cache_extent cache;
312 struct cache_extent cache;
313 struct cache_tree root_cache;
314 struct cache_tree inode_cache;
315 struct inode_record *current;
324 struct walk_control {
325 struct cache_tree shared;
326 struct shared_node *nodes[BTRFS_MAX_LEVEL];
332 struct btrfs_key key;
334 struct list_head list;
337 struct extent_entry {
342 struct list_head list;
345 struct root_item_info {
346 /* level of the root */
348 /* number of nodes at this level, must be 1 for a root */
352 struct cache_extent cache_extent;
356 * Error bit for low memory mode check.
358 * Currently no caller cares about it yet. Just internal use for error
361 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
362 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
363 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
364 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
365 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
366 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
367 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
368 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
369 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
370 #define CHUNK_TYPE_MISMATCH (1 << 8)
372 static void *print_status_check(void *p)
374 struct task_ctx *priv = p;
375 const char work_indicator[] = { '.', 'o', 'O', 'o' };
377 static char *task_position_string[] = {
379 "checking free space cache",
383 task_period_start(priv->info, 1000 /* 1s */);
385 if (priv->tp == TASK_NOTHING)
389 printf("%s [%c]\r", task_position_string[priv->tp],
390 work_indicator[count % 4]);
393 task_period_wait(priv->info);
398 static int print_status_return(void *p)
406 static enum btrfs_check_mode parse_check_mode(const char *str)
408 if (strcmp(str, "lowmem") == 0)
409 return CHECK_MODE_LOWMEM;
410 if (strcmp(str, "orig") == 0)
411 return CHECK_MODE_ORIGINAL;
412 if (strcmp(str, "original") == 0)
413 return CHECK_MODE_ORIGINAL;
415 return CHECK_MODE_UNKNOWN;
418 /* Compatible function to allow reuse of old codes */
419 static u64 first_extent_gap(struct rb_root *holes)
421 struct file_extent_hole *hole;
423 if (RB_EMPTY_ROOT(holes))
426 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
430 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
432 struct file_extent_hole *hole1;
433 struct file_extent_hole *hole2;
435 hole1 = rb_entry(node1, struct file_extent_hole, node);
436 hole2 = rb_entry(node2, struct file_extent_hole, node);
438 if (hole1->start > hole2->start)
440 if (hole1->start < hole2->start)
442 /* Now hole1->start == hole2->start */
443 if (hole1->len >= hole2->len)
445 * Hole 1 will be merge center
446 * Same hole will be merged later
449 /* Hole 2 will be merge center */
454 * Add a hole to the record
456 * This will do hole merge for copy_file_extent_holes(),
457 * which will ensure there won't be continuous holes.
459 static int add_file_extent_hole(struct rb_root *holes,
462 struct file_extent_hole *hole;
463 struct file_extent_hole *prev = NULL;
464 struct file_extent_hole *next = NULL;
466 hole = malloc(sizeof(*hole));
471 /* Since compare will not return 0, no -EEXIST will happen */
472 rb_insert(holes, &hole->node, compare_hole);
474 /* simple merge with previous hole */
475 if (rb_prev(&hole->node))
476 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
478 if (prev && prev->start + prev->len >= hole->start) {
479 hole->len = hole->start + hole->len - prev->start;
480 hole->start = prev->start;
481 rb_erase(&prev->node, holes);
486 /* iterate merge with next holes */
488 if (!rb_next(&hole->node))
490 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
492 if (hole->start + hole->len >= next->start) {
493 if (hole->start + hole->len <= next->start + next->len)
494 hole->len = next->start + next->len -
496 rb_erase(&next->node, holes);
505 static int compare_hole_range(struct rb_node *node, void *data)
507 struct file_extent_hole *hole;
510 hole = (struct file_extent_hole *)data;
513 hole = rb_entry(node, struct file_extent_hole, node);
514 if (start < hole->start)
516 if (start >= hole->start && start < hole->start + hole->len)
522 * Delete a hole in the record
524 * This will do the hole split and is much restrict than add.
526 static int del_file_extent_hole(struct rb_root *holes,
529 struct file_extent_hole *hole;
530 struct file_extent_hole tmp;
535 struct rb_node *node;
542 node = rb_search(holes, &tmp, compare_hole_range, NULL);
545 hole = rb_entry(node, struct file_extent_hole, node);
546 if (start + len > hole->start + hole->len)
550 * Now there will be no overlap, delete the hole and re-add the
551 * split(s) if they exists.
553 if (start > hole->start) {
554 prev_start = hole->start;
555 prev_len = start - hole->start;
558 if (hole->start + hole->len > start + len) {
559 next_start = start + len;
560 next_len = hole->start + hole->len - start - len;
563 rb_erase(node, holes);
566 ret = add_file_extent_hole(holes, prev_start, prev_len);
571 ret = add_file_extent_hole(holes, next_start, next_len);
578 static int copy_file_extent_holes(struct rb_root *dst,
581 struct file_extent_hole *hole;
582 struct rb_node *node;
585 node = rb_first(src);
587 hole = rb_entry(node, struct file_extent_hole, node);
588 ret = add_file_extent_hole(dst, hole->start, hole->len);
591 node = rb_next(node);
596 static void free_file_extent_holes(struct rb_root *holes)
598 struct rb_node *node;
599 struct file_extent_hole *hole;
601 node = rb_first(holes);
603 hole = rb_entry(node, struct file_extent_hole, node);
604 rb_erase(node, holes);
606 node = rb_first(holes);
610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
612 static void record_root_in_trans(struct btrfs_trans_handle *trans,
613 struct btrfs_root *root)
615 if (root->last_trans != trans->transid) {
616 root->track_dirty = 1;
617 root->last_trans = trans->transid;
618 root->commit_root = root->node;
619 extent_buffer_get(root->node);
623 static u8 imode_to_type(u32 imode)
626 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
627 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
628 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
629 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
630 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
631 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
632 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
633 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
636 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
640 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
642 struct device_record *rec1;
643 struct device_record *rec2;
645 rec1 = rb_entry(node1, struct device_record, node);
646 rec2 = rb_entry(node2, struct device_record, node);
647 if (rec1->devid > rec2->devid)
649 else if (rec1->devid < rec2->devid)
655 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
657 struct inode_record *rec;
658 struct inode_backref *backref;
659 struct inode_backref *orig;
660 struct inode_backref *tmp;
661 struct orphan_data_extent *src_orphan;
662 struct orphan_data_extent *dst_orphan;
667 rec = malloc(sizeof(*rec));
669 return ERR_PTR(-ENOMEM);
670 memcpy(rec, orig_rec, sizeof(*rec));
672 INIT_LIST_HEAD(&rec->backrefs);
673 INIT_LIST_HEAD(&rec->orphan_extents);
674 rec->holes = RB_ROOT;
676 list_for_each_entry(orig, &orig_rec->backrefs, list) {
677 size = sizeof(*orig) + orig->namelen + 1;
678 backref = malloc(size);
683 memcpy(backref, orig, size);
684 list_add_tail(&backref->list, &rec->backrefs);
686 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
687 dst_orphan = malloc(sizeof(*dst_orphan));
692 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
693 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
695 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
702 rb = rb_first(&rec->holes);
704 struct file_extent_hole *hole;
706 hole = rb_entry(rb, struct file_extent_hole, node);
712 if (!list_empty(&rec->backrefs))
713 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
714 list_del(&orig->list);
718 if (!list_empty(&rec->orphan_extents))
719 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
720 list_del(&orig->list);
729 static void print_orphan_data_extents(struct list_head *orphan_extents,
732 struct orphan_data_extent *orphan;
734 if (list_empty(orphan_extents))
736 printf("The following data extent is lost in tree %llu:\n",
738 list_for_each_entry(orphan, orphan_extents, list) {
739 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
740 orphan->objectid, orphan->offset, orphan->disk_bytenr,
745 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
747 u64 root_objectid = root->root_key.objectid;
748 int errors = rec->errors;
752 /* reloc root errors, we print its corresponding fs root objectid*/
753 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
754 root_objectid = root->root_key.offset;
755 fprintf(stderr, "reloc");
757 fprintf(stderr, "root %llu inode %llu errors %x",
758 (unsigned long long) root_objectid,
759 (unsigned long long) rec->ino, rec->errors);
761 if (errors & I_ERR_NO_INODE_ITEM)
762 fprintf(stderr, ", no inode item");
763 if (errors & I_ERR_NO_ORPHAN_ITEM)
764 fprintf(stderr, ", no orphan item");
765 if (errors & I_ERR_DUP_INODE_ITEM)
766 fprintf(stderr, ", dup inode item");
767 if (errors & I_ERR_DUP_DIR_INDEX)
768 fprintf(stderr, ", dup dir index");
769 if (errors & I_ERR_ODD_DIR_ITEM)
770 fprintf(stderr, ", odd dir item");
771 if (errors & I_ERR_ODD_FILE_EXTENT)
772 fprintf(stderr, ", odd file extent");
773 if (errors & I_ERR_BAD_FILE_EXTENT)
774 fprintf(stderr, ", bad file extent");
775 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
776 fprintf(stderr, ", file extent overlap");
777 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
778 fprintf(stderr, ", file extent discount");
779 if (errors & I_ERR_DIR_ISIZE_WRONG)
780 fprintf(stderr, ", dir isize wrong");
781 if (errors & I_ERR_FILE_NBYTES_WRONG)
782 fprintf(stderr, ", nbytes wrong");
783 if (errors & I_ERR_ODD_CSUM_ITEM)
784 fprintf(stderr, ", odd csum item");
785 if (errors & I_ERR_SOME_CSUM_MISSING)
786 fprintf(stderr, ", some csum missing");
787 if (errors & I_ERR_LINK_COUNT_WRONG)
788 fprintf(stderr, ", link count wrong");
789 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
790 fprintf(stderr, ", orphan file extent");
791 fprintf(stderr, "\n");
792 /* Print the orphan extents if needed */
793 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
794 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
796 /* Print the holes if needed */
797 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
798 struct file_extent_hole *hole;
799 struct rb_node *node;
802 node = rb_first(&rec->holes);
803 fprintf(stderr, "Found file extent holes:\n");
806 hole = rb_entry(node, struct file_extent_hole, node);
807 fprintf(stderr, "\tstart: %llu, len: %llu\n",
808 hole->start, hole->len);
809 node = rb_next(node);
812 fprintf(stderr, "\tstart: 0, len: %llu\n",
813 round_up(rec->isize, root->sectorsize));
817 static void print_ref_error(int errors)
819 if (errors & REF_ERR_NO_DIR_ITEM)
820 fprintf(stderr, ", no dir item");
821 if (errors & REF_ERR_NO_DIR_INDEX)
822 fprintf(stderr, ", no dir index");
823 if (errors & REF_ERR_NO_INODE_REF)
824 fprintf(stderr, ", no inode ref");
825 if (errors & REF_ERR_DUP_DIR_ITEM)
826 fprintf(stderr, ", dup dir item");
827 if (errors & REF_ERR_DUP_DIR_INDEX)
828 fprintf(stderr, ", dup dir index");
829 if (errors & REF_ERR_DUP_INODE_REF)
830 fprintf(stderr, ", dup inode ref");
831 if (errors & REF_ERR_INDEX_UNMATCH)
832 fprintf(stderr, ", index mismatch");
833 if (errors & REF_ERR_FILETYPE_UNMATCH)
834 fprintf(stderr, ", filetype mismatch");
835 if (errors & REF_ERR_NAME_TOO_LONG)
836 fprintf(stderr, ", name too long");
837 if (errors & REF_ERR_NO_ROOT_REF)
838 fprintf(stderr, ", no root ref");
839 if (errors & REF_ERR_NO_ROOT_BACKREF)
840 fprintf(stderr, ", no root backref");
841 if (errors & REF_ERR_DUP_ROOT_REF)
842 fprintf(stderr, ", dup root ref");
843 if (errors & REF_ERR_DUP_ROOT_BACKREF)
844 fprintf(stderr, ", dup root backref");
845 fprintf(stderr, "\n");
848 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
851 struct ptr_node *node;
852 struct cache_extent *cache;
853 struct inode_record *rec = NULL;
856 cache = lookup_cache_extent(inode_cache, ino, 1);
858 node = container_of(cache, struct ptr_node, cache);
860 if (mod && rec->refs > 1) {
861 node->data = clone_inode_rec(rec);
862 if (IS_ERR(node->data))
868 rec = calloc(1, sizeof(*rec));
870 return ERR_PTR(-ENOMEM);
872 rec->extent_start = (u64)-1;
874 INIT_LIST_HEAD(&rec->backrefs);
875 INIT_LIST_HEAD(&rec->orphan_extents);
876 rec->holes = RB_ROOT;
878 node = malloc(sizeof(*node));
881 return ERR_PTR(-ENOMEM);
883 node->cache.start = ino;
884 node->cache.size = 1;
887 if (ino == BTRFS_FREE_INO_OBJECTID)
890 ret = insert_cache_extent(inode_cache, &node->cache);
892 return ERR_PTR(-EEXIST);
897 static void free_orphan_data_extents(struct list_head *orphan_extents)
899 struct orphan_data_extent *orphan;
901 while (!list_empty(orphan_extents)) {
902 orphan = list_entry(orphan_extents->next,
903 struct orphan_data_extent, list);
904 list_del(&orphan->list);
909 static void free_inode_rec(struct inode_record *rec)
911 struct inode_backref *backref;
916 while (!list_empty(&rec->backrefs)) {
917 backref = to_inode_backref(rec->backrefs.next);
918 list_del(&backref->list);
921 free_orphan_data_extents(&rec->orphan_extents);
922 free_file_extent_holes(&rec->holes);
926 static int can_free_inode_rec(struct inode_record *rec)
928 if (!rec->errors && rec->checked && rec->found_inode_item &&
929 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
934 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
935 struct inode_record *rec)
937 struct cache_extent *cache;
938 struct inode_backref *tmp, *backref;
939 struct ptr_node *node;
942 if (!rec->found_inode_item)
945 filetype = imode_to_type(rec->imode);
946 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
947 if (backref->found_dir_item && backref->found_dir_index) {
948 if (backref->filetype != filetype)
949 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
950 if (!backref->errors && backref->found_inode_ref &&
951 rec->nlink == rec->found_link) {
952 list_del(&backref->list);
958 if (!rec->checked || rec->merging)
961 if (S_ISDIR(rec->imode)) {
962 if (rec->found_size != rec->isize)
963 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
964 if (rec->found_file_extent)
965 rec->errors |= I_ERR_ODD_FILE_EXTENT;
966 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
967 if (rec->found_dir_item)
968 rec->errors |= I_ERR_ODD_DIR_ITEM;
969 if (rec->found_size != rec->nbytes)
970 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
971 if (rec->nlink > 0 && !no_holes &&
972 (rec->extent_end < rec->isize ||
973 first_extent_gap(&rec->holes) < rec->isize))
974 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
977 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
978 if (rec->found_csum_item && rec->nodatasum)
979 rec->errors |= I_ERR_ODD_CSUM_ITEM;
980 if (rec->some_csum_missing && !rec->nodatasum)
981 rec->errors |= I_ERR_SOME_CSUM_MISSING;
984 BUG_ON(rec->refs != 1);
985 if (can_free_inode_rec(rec)) {
986 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
987 node = container_of(cache, struct ptr_node, cache);
988 BUG_ON(node->data != rec);
989 remove_cache_extent(inode_cache, &node->cache);
995 static int check_orphan_item(struct btrfs_root *root, u64 ino)
997 struct btrfs_path path;
998 struct btrfs_key key;
1001 key.objectid = BTRFS_ORPHAN_OBJECTID;
1002 key.type = BTRFS_ORPHAN_ITEM_KEY;
1005 btrfs_init_path(&path);
1006 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1007 btrfs_release_path(&path);
1013 static int process_inode_item(struct extent_buffer *eb,
1014 int slot, struct btrfs_key *key,
1015 struct shared_node *active_node)
1017 struct inode_record *rec;
1018 struct btrfs_inode_item *item;
1020 rec = active_node->current;
1021 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1022 if (rec->found_inode_item) {
1023 rec->errors |= I_ERR_DUP_INODE_ITEM;
1026 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1027 rec->nlink = btrfs_inode_nlink(eb, item);
1028 rec->isize = btrfs_inode_size(eb, item);
1029 rec->nbytes = btrfs_inode_nbytes(eb, item);
1030 rec->imode = btrfs_inode_mode(eb, item);
1031 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1033 rec->found_inode_item = 1;
1034 if (rec->nlink == 0)
1035 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1036 maybe_free_inode_rec(&active_node->inode_cache, rec);
1040 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1042 int namelen, u64 dir)
1044 struct inode_backref *backref;
1046 list_for_each_entry(backref, &rec->backrefs, list) {
1047 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1049 if (backref->dir != dir || backref->namelen != namelen)
1051 if (memcmp(name, backref->name, namelen))
1056 backref = malloc(sizeof(*backref) + namelen + 1);
1059 memset(backref, 0, sizeof(*backref));
1061 backref->namelen = namelen;
1062 memcpy(backref->name, name, namelen);
1063 backref->name[namelen] = '\0';
1064 list_add_tail(&backref->list, &rec->backrefs);
1068 static int add_inode_backref(struct cache_tree *inode_cache,
1069 u64 ino, u64 dir, u64 index,
1070 const char *name, int namelen,
1071 u8 filetype, u8 itemtype, int errors)
1073 struct inode_record *rec;
1074 struct inode_backref *backref;
1076 rec = get_inode_rec(inode_cache, ino, 1);
1077 BUG_ON(IS_ERR(rec));
1078 backref = get_inode_backref(rec, name, namelen, dir);
1081 backref->errors |= errors;
1082 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1083 if (backref->found_dir_index)
1084 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1085 if (backref->found_inode_ref && backref->index != index)
1086 backref->errors |= REF_ERR_INDEX_UNMATCH;
1087 if (backref->found_dir_item && backref->filetype != filetype)
1088 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1090 backref->index = index;
1091 backref->filetype = filetype;
1092 backref->found_dir_index = 1;
1093 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1095 if (backref->found_dir_item)
1096 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1097 if (backref->found_dir_index && backref->filetype != filetype)
1098 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1100 backref->filetype = filetype;
1101 backref->found_dir_item = 1;
1102 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1103 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1104 if (backref->found_inode_ref)
1105 backref->errors |= REF_ERR_DUP_INODE_REF;
1106 if (backref->found_dir_index && backref->index != index)
1107 backref->errors |= REF_ERR_INDEX_UNMATCH;
1109 backref->index = index;
1111 backref->ref_type = itemtype;
1112 backref->found_inode_ref = 1;
1117 maybe_free_inode_rec(inode_cache, rec);
1121 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1122 struct cache_tree *dst_cache)
1124 struct inode_backref *backref;
1129 list_for_each_entry(backref, &src->backrefs, list) {
1130 if (backref->found_dir_index) {
1131 add_inode_backref(dst_cache, dst->ino, backref->dir,
1132 backref->index, backref->name,
1133 backref->namelen, backref->filetype,
1134 BTRFS_DIR_INDEX_KEY, backref->errors);
1136 if (backref->found_dir_item) {
1138 add_inode_backref(dst_cache, dst->ino,
1139 backref->dir, 0, backref->name,
1140 backref->namelen, backref->filetype,
1141 BTRFS_DIR_ITEM_KEY, backref->errors);
1143 if (backref->found_inode_ref) {
1144 add_inode_backref(dst_cache, dst->ino,
1145 backref->dir, backref->index,
1146 backref->name, backref->namelen, 0,
1147 backref->ref_type, backref->errors);
1151 if (src->found_dir_item)
1152 dst->found_dir_item = 1;
1153 if (src->found_file_extent)
1154 dst->found_file_extent = 1;
1155 if (src->found_csum_item)
1156 dst->found_csum_item = 1;
1157 if (src->some_csum_missing)
1158 dst->some_csum_missing = 1;
1159 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1160 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1165 BUG_ON(src->found_link < dir_count);
1166 dst->found_link += src->found_link - dir_count;
1167 dst->found_size += src->found_size;
1168 if (src->extent_start != (u64)-1) {
1169 if (dst->extent_start == (u64)-1) {
1170 dst->extent_start = src->extent_start;
1171 dst->extent_end = src->extent_end;
1173 if (dst->extent_end > src->extent_start)
1174 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1175 else if (dst->extent_end < src->extent_start) {
1176 ret = add_file_extent_hole(&dst->holes,
1178 src->extent_start - dst->extent_end);
1180 if (dst->extent_end < src->extent_end)
1181 dst->extent_end = src->extent_end;
1185 dst->errors |= src->errors;
1186 if (src->found_inode_item) {
1187 if (!dst->found_inode_item) {
1188 dst->nlink = src->nlink;
1189 dst->isize = src->isize;
1190 dst->nbytes = src->nbytes;
1191 dst->imode = src->imode;
1192 dst->nodatasum = src->nodatasum;
1193 dst->found_inode_item = 1;
1195 dst->errors |= I_ERR_DUP_INODE_ITEM;
1203 static int splice_shared_node(struct shared_node *src_node,
1204 struct shared_node *dst_node)
1206 struct cache_extent *cache;
1207 struct ptr_node *node, *ins;
1208 struct cache_tree *src, *dst;
1209 struct inode_record *rec, *conflict;
1210 u64 current_ino = 0;
1214 if (--src_node->refs == 0)
1216 if (src_node->current)
1217 current_ino = src_node->current->ino;
1219 src = &src_node->root_cache;
1220 dst = &dst_node->root_cache;
1222 cache = search_cache_extent(src, 0);
1224 node = container_of(cache, struct ptr_node, cache);
1226 cache = next_cache_extent(cache);
1229 remove_cache_extent(src, &node->cache);
1232 ins = malloc(sizeof(*ins));
1234 ins->cache.start = node->cache.start;
1235 ins->cache.size = node->cache.size;
1239 ret = insert_cache_extent(dst, &ins->cache);
1240 if (ret == -EEXIST) {
1241 conflict = get_inode_rec(dst, rec->ino, 1);
1242 BUG_ON(IS_ERR(conflict));
1243 merge_inode_recs(rec, conflict, dst);
1245 conflict->checked = 1;
1246 if (dst_node->current == conflict)
1247 dst_node->current = NULL;
1249 maybe_free_inode_rec(dst, conflict);
1250 free_inode_rec(rec);
1257 if (src == &src_node->root_cache) {
1258 src = &src_node->inode_cache;
1259 dst = &dst_node->inode_cache;
1263 if (current_ino > 0 && (!dst_node->current ||
1264 current_ino > dst_node->current->ino)) {
1265 if (dst_node->current) {
1266 dst_node->current->checked = 1;
1267 maybe_free_inode_rec(dst, dst_node->current);
1269 dst_node->current = get_inode_rec(dst, current_ino, 1);
1270 BUG_ON(IS_ERR(dst_node->current));
1275 static void free_inode_ptr(struct cache_extent *cache)
1277 struct ptr_node *node;
1278 struct inode_record *rec;
1280 node = container_of(cache, struct ptr_node, cache);
1282 free_inode_rec(rec);
1286 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1288 static struct shared_node *find_shared_node(struct cache_tree *shared,
1291 struct cache_extent *cache;
1292 struct shared_node *node;
1294 cache = lookup_cache_extent(shared, bytenr, 1);
1296 node = container_of(cache, struct shared_node, cache);
1302 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1305 struct shared_node *node;
1307 node = calloc(1, sizeof(*node));
1310 node->cache.start = bytenr;
1311 node->cache.size = 1;
1312 cache_tree_init(&node->root_cache);
1313 cache_tree_init(&node->inode_cache);
1316 ret = insert_cache_extent(shared, &node->cache);
1321 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1322 struct walk_control *wc, int level)
1324 struct shared_node *node;
1325 struct shared_node *dest;
1328 if (level == wc->active_node)
1331 BUG_ON(wc->active_node <= level);
1332 node = find_shared_node(&wc->shared, bytenr);
1334 ret = add_shared_node(&wc->shared, bytenr, refs);
1336 node = find_shared_node(&wc->shared, bytenr);
1337 wc->nodes[level] = node;
1338 wc->active_node = level;
1342 if (wc->root_level == wc->active_node &&
1343 btrfs_root_refs(&root->root_item) == 0) {
1344 if (--node->refs == 0) {
1345 free_inode_recs_tree(&node->root_cache);
1346 free_inode_recs_tree(&node->inode_cache);
1347 remove_cache_extent(&wc->shared, &node->cache);
1353 dest = wc->nodes[wc->active_node];
1354 splice_shared_node(node, dest);
1355 if (node->refs == 0) {
1356 remove_cache_extent(&wc->shared, &node->cache);
1362 static int leave_shared_node(struct btrfs_root *root,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->root_level)
1372 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1376 BUG_ON(i >= BTRFS_MAX_LEVEL);
1378 node = wc->nodes[wc->active_node];
1379 wc->nodes[wc->active_node] = NULL;
1380 wc->active_node = i;
1382 dest = wc->nodes[wc->active_node];
1383 if (wc->active_node < wc->root_level ||
1384 btrfs_root_refs(&root->root_item) > 0) {
1385 BUG_ON(node->refs <= 1);
1386 splice_shared_node(node, dest);
1388 BUG_ON(node->refs < 2);
1397 * 1 - if the root with id child_root_id is a child of root parent_root_id
1398 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1399 * has other root(s) as parent(s)
1400 * 2 - if the root child_root_id doesn't have any parent roots
1402 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1405 struct btrfs_path path;
1406 struct btrfs_key key;
1407 struct extent_buffer *leaf;
1411 btrfs_init_path(&path);
1413 key.objectid = parent_root_id;
1414 key.type = BTRFS_ROOT_REF_KEY;
1415 key.offset = child_root_id;
1416 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1420 btrfs_release_path(&path);
1424 key.objectid = child_root_id;
1425 key.type = BTRFS_ROOT_BACKREF_KEY;
1427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1433 leaf = path.nodes[0];
1434 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1435 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1438 leaf = path.nodes[0];
1441 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1442 if (key.objectid != child_root_id ||
1443 key.type != BTRFS_ROOT_BACKREF_KEY)
1448 if (key.offset == parent_root_id) {
1449 btrfs_release_path(&path);
1456 btrfs_release_path(&path);
1459 return has_parent ? 0 : 2;
1462 static int process_dir_item(struct btrfs_root *root,
1463 struct extent_buffer *eb,
1464 int slot, struct btrfs_key *key,
1465 struct shared_node *active_node)
1475 struct btrfs_dir_item *di;
1476 struct inode_record *rec;
1477 struct cache_tree *root_cache;
1478 struct cache_tree *inode_cache;
1479 struct btrfs_key location;
1480 char namebuf[BTRFS_NAME_LEN];
1482 root_cache = &active_node->root_cache;
1483 inode_cache = &active_node->inode_cache;
1484 rec = active_node->current;
1485 rec->found_dir_item = 1;
1487 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1488 total = btrfs_item_size_nr(eb, slot);
1489 while (cur < total) {
1491 btrfs_dir_item_key_to_cpu(eb, di, &location);
1492 name_len = btrfs_dir_name_len(eb, di);
1493 data_len = btrfs_dir_data_len(eb, di);
1494 filetype = btrfs_dir_type(eb, di);
1496 rec->found_size += name_len;
1497 if (name_len <= BTRFS_NAME_LEN) {
1501 len = BTRFS_NAME_LEN;
1502 error = REF_ERR_NAME_TOO_LONG;
1504 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1506 if (location.type == BTRFS_INODE_ITEM_KEY) {
1507 add_inode_backref(inode_cache, location.objectid,
1508 key->objectid, key->offset, namebuf,
1509 len, filetype, key->type, error);
1510 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1511 add_inode_backref(root_cache, location.objectid,
1512 key->objectid, key->offset,
1513 namebuf, len, filetype,
1516 fprintf(stderr, "invalid location in dir item %u\n",
1518 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1519 key->objectid, key->offset, namebuf,
1520 len, filetype, key->type, error);
1523 len = sizeof(*di) + name_len + data_len;
1524 di = (struct btrfs_dir_item *)((char *)di + len);
1527 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1528 rec->errors |= I_ERR_DUP_DIR_INDEX;
1533 static int process_inode_ref(struct extent_buffer *eb,
1534 int slot, struct btrfs_key *key,
1535 struct shared_node *active_node)
1543 struct cache_tree *inode_cache;
1544 struct btrfs_inode_ref *ref;
1545 char namebuf[BTRFS_NAME_LEN];
1547 inode_cache = &active_node->inode_cache;
1549 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1550 total = btrfs_item_size_nr(eb, slot);
1551 while (cur < total) {
1552 name_len = btrfs_inode_ref_name_len(eb, ref);
1553 index = btrfs_inode_ref_index(eb, ref);
1554 if (name_len <= BTRFS_NAME_LEN) {
1558 len = BTRFS_NAME_LEN;
1559 error = REF_ERR_NAME_TOO_LONG;
1561 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1562 add_inode_backref(inode_cache, key->objectid, key->offset,
1563 index, namebuf, len, 0, key->type, error);
1565 len = sizeof(*ref) + name_len;
1566 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1572 static int process_inode_extref(struct extent_buffer *eb,
1573 int slot, struct btrfs_key *key,
1574 struct shared_node *active_node)
1583 struct cache_tree *inode_cache;
1584 struct btrfs_inode_extref *extref;
1585 char namebuf[BTRFS_NAME_LEN];
1587 inode_cache = &active_node->inode_cache;
1589 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1590 total = btrfs_item_size_nr(eb, slot);
1591 while (cur < total) {
1592 name_len = btrfs_inode_extref_name_len(eb, extref);
1593 index = btrfs_inode_extref_index(eb, extref);
1594 parent = btrfs_inode_extref_parent(eb, extref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf,
1603 (unsigned long)(extref + 1), len);
1604 add_inode_backref(inode_cache, key->objectid, parent,
1605 index, namebuf, len, 0, key->type, error);
1607 len = sizeof(*extref) + name_len;
1608 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1615 static int count_csum_range(struct btrfs_root *root, u64 start,
1616 u64 len, u64 *found)
1618 struct btrfs_key key;
1619 struct btrfs_path path;
1620 struct extent_buffer *leaf;
1625 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1627 btrfs_init_path(&path);
1629 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1631 key.type = BTRFS_EXTENT_CSUM_KEY;
1633 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1637 if (ret > 0 && path.slots[0] > 0) {
1638 leaf = path.nodes[0];
1639 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1640 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1641 key.type == BTRFS_EXTENT_CSUM_KEY)
1646 leaf = path.nodes[0];
1647 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1648 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1653 leaf = path.nodes[0];
1656 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1657 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1658 key.type != BTRFS_EXTENT_CSUM_KEY)
1661 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1662 if (key.offset >= start + len)
1665 if (key.offset > start)
1668 size = btrfs_item_size_nr(leaf, path.slots[0]);
1669 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1670 if (csum_end > start) {
1671 size = min(csum_end - start, len);
1680 btrfs_release_path(&path);
1686 static int process_file_extent(struct btrfs_root *root,
1687 struct extent_buffer *eb,
1688 int slot, struct btrfs_key *key,
1689 struct shared_node *active_node)
1691 struct inode_record *rec;
1692 struct btrfs_file_extent_item *fi;
1694 u64 disk_bytenr = 0;
1695 u64 extent_offset = 0;
1696 u64 mask = root->sectorsize - 1;
1700 rec = active_node->current;
1701 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1702 rec->found_file_extent = 1;
1704 if (rec->extent_start == (u64)-1) {
1705 rec->extent_start = key->offset;
1706 rec->extent_end = key->offset;
1709 if (rec->extent_end > key->offset)
1710 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1711 else if (rec->extent_end < key->offset) {
1712 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1713 key->offset - rec->extent_end);
1718 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1719 extent_type = btrfs_file_extent_type(eb, fi);
1721 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1722 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1724 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1725 rec->found_size += num_bytes;
1726 num_bytes = (num_bytes + mask) & ~mask;
1727 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1728 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1729 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1730 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1731 extent_offset = btrfs_file_extent_offset(eb, fi);
1732 if (num_bytes == 0 || (num_bytes & mask))
1733 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1734 if (num_bytes + extent_offset >
1735 btrfs_file_extent_ram_bytes(eb, fi))
1736 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1737 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1738 (btrfs_file_extent_compression(eb, fi) ||
1739 btrfs_file_extent_encryption(eb, fi) ||
1740 btrfs_file_extent_other_encoding(eb, fi)))
1741 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1742 if (disk_bytenr > 0)
1743 rec->found_size += num_bytes;
1745 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1747 rec->extent_end = key->offset + num_bytes;
1750 * The data reloc tree will copy full extents into its inode and then
1751 * copy the corresponding csums. Because the extent it copied could be
1752 * a preallocated extent that hasn't been written to yet there may be no
1753 * csums to copy, ergo we won't have csums for our file extent. This is
1754 * ok so just don't bother checking csums if the inode belongs to the
1757 if (disk_bytenr > 0 &&
1758 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1760 if (btrfs_file_extent_compression(eb, fi))
1761 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1763 disk_bytenr += extent_offset;
1765 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1768 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1770 rec->found_csum_item = 1;
1771 if (found < num_bytes)
1772 rec->some_csum_missing = 1;
1773 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1775 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1781 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1782 struct walk_control *wc)
1784 struct btrfs_key key;
1788 struct cache_tree *inode_cache;
1789 struct shared_node *active_node;
1791 if (wc->root_level == wc->active_node &&
1792 btrfs_root_refs(&root->root_item) == 0)
1795 active_node = wc->nodes[wc->active_node];
1796 inode_cache = &active_node->inode_cache;
1797 nritems = btrfs_header_nritems(eb);
1798 for (i = 0; i < nritems; i++) {
1799 btrfs_item_key_to_cpu(eb, &key, i);
1801 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1803 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1806 if (active_node->current == NULL ||
1807 active_node->current->ino < key.objectid) {
1808 if (active_node->current) {
1809 active_node->current->checked = 1;
1810 maybe_free_inode_rec(inode_cache,
1811 active_node->current);
1813 active_node->current = get_inode_rec(inode_cache,
1815 BUG_ON(IS_ERR(active_node->current));
1818 case BTRFS_DIR_ITEM_KEY:
1819 case BTRFS_DIR_INDEX_KEY:
1820 ret = process_dir_item(root, eb, i, &key, active_node);
1822 case BTRFS_INODE_REF_KEY:
1823 ret = process_inode_ref(eb, i, &key, active_node);
1825 case BTRFS_INODE_EXTREF_KEY:
1826 ret = process_inode_extref(eb, i, &key, active_node);
1828 case BTRFS_INODE_ITEM_KEY:
1829 ret = process_inode_item(eb, i, &key, active_node);
1831 case BTRFS_EXTENT_DATA_KEY:
1832 ret = process_file_extent(root, eb, i, &key,
1842 static void reada_walk_down(struct btrfs_root *root,
1843 struct extent_buffer *node, int slot)
1852 level = btrfs_header_level(node);
1856 nritems = btrfs_header_nritems(node);
1857 blocksize = root->nodesize;
1858 for (i = slot; i < nritems; i++) {
1859 bytenr = btrfs_node_blockptr(node, i);
1860 ptr_gen = btrfs_node_ptr_generation(node, i);
1861 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1866 * Check the child node/leaf by the following condition:
1867 * 1. the first item key of the node/leaf should be the same with the one
1869 * 2. block in parent node should match the child node/leaf.
1870 * 3. generation of parent node and child's header should be consistent.
1872 * Or the child node/leaf pointed by the key in parent is not valid.
1874 * We hope to check leaf owner too, but since subvol may share leaves,
1875 * which makes leaf owner check not so strong, key check should be
1876 * sufficient enough for that case.
1878 static int check_child_node(struct btrfs_root *root,
1879 struct extent_buffer *parent, int slot,
1880 struct extent_buffer *child)
1882 struct btrfs_key parent_key;
1883 struct btrfs_key child_key;
1886 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1887 if (btrfs_header_level(child) == 0)
1888 btrfs_item_key_to_cpu(child, &child_key, 0);
1890 btrfs_node_key_to_cpu(child, &child_key, 0);
1892 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1895 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1896 parent_key.objectid, parent_key.type, parent_key.offset,
1897 child_key.objectid, child_key.type, child_key.offset);
1899 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1901 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1902 btrfs_node_blockptr(parent, slot),
1903 btrfs_header_bytenr(child));
1905 if (btrfs_node_ptr_generation(parent, slot) !=
1906 btrfs_header_generation(child)) {
1908 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1909 btrfs_header_generation(child),
1910 btrfs_node_ptr_generation(parent, slot));
1916 u64 bytenr[BTRFS_MAX_LEVEL];
1917 u64 refs[BTRFS_MAX_LEVEL];
1920 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1921 struct walk_control *wc, int *level,
1922 struct node_refs *nrefs)
1924 enum btrfs_tree_block_status status;
1927 struct extent_buffer *next;
1928 struct extent_buffer *cur;
1933 WARN_ON(*level < 0);
1934 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1936 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1937 refs = nrefs->refs[*level];
1940 ret = btrfs_lookup_extent_info(NULL, root,
1941 path->nodes[*level]->start,
1942 *level, 1, &refs, NULL);
1947 nrefs->bytenr[*level] = path->nodes[*level]->start;
1948 nrefs->refs[*level] = refs;
1952 ret = enter_shared_node(root, path->nodes[*level]->start,
1960 while (*level >= 0) {
1961 WARN_ON(*level < 0);
1962 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1963 cur = path->nodes[*level];
1965 if (btrfs_header_level(cur) != *level)
1968 if (path->slots[*level] >= btrfs_header_nritems(cur))
1971 ret = process_one_leaf(root, cur, wc);
1976 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1977 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1978 blocksize = root->nodesize;
1980 if (bytenr == nrefs->bytenr[*level - 1]) {
1981 refs = nrefs->refs[*level - 1];
1983 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1984 *level - 1, 1, &refs, NULL);
1988 nrefs->bytenr[*level - 1] = bytenr;
1989 nrefs->refs[*level - 1] = refs;
1994 ret = enter_shared_node(root, bytenr, refs,
1997 path->slots[*level]++;
2002 next = btrfs_find_tree_block(root, bytenr, blocksize);
2003 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2004 free_extent_buffer(next);
2005 reada_walk_down(root, cur, path->slots[*level]);
2006 next = read_tree_block(root, bytenr, blocksize,
2008 if (!extent_buffer_uptodate(next)) {
2009 struct btrfs_key node_key;
2011 btrfs_node_key_to_cpu(path->nodes[*level],
2013 path->slots[*level]);
2014 btrfs_add_corrupt_extent_record(root->fs_info,
2016 path->nodes[*level]->start,
2017 root->nodesize, *level);
2023 ret = check_child_node(root, cur, path->slots[*level], next);
2029 if (btrfs_is_leaf(next))
2030 status = btrfs_check_leaf(root, NULL, next);
2032 status = btrfs_check_node(root, NULL, next);
2033 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2034 free_extent_buffer(next);
2039 *level = *level - 1;
2040 free_extent_buffer(path->nodes[*level]);
2041 path->nodes[*level] = next;
2042 path->slots[*level] = 0;
2045 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2049 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2050 struct walk_control *wc, int *level)
2053 struct extent_buffer *leaf;
2055 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2056 leaf = path->nodes[i];
2057 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2062 free_extent_buffer(path->nodes[*level]);
2063 path->nodes[*level] = NULL;
2064 BUG_ON(*level > wc->active_node);
2065 if (*level == wc->active_node)
2066 leave_shared_node(root, wc, *level);
2073 static int check_root_dir(struct inode_record *rec)
2075 struct inode_backref *backref;
2078 if (!rec->found_inode_item || rec->errors)
2080 if (rec->nlink != 1 || rec->found_link != 0)
2082 if (list_empty(&rec->backrefs))
2084 backref = to_inode_backref(rec->backrefs.next);
2085 if (!backref->found_inode_ref)
2087 if (backref->index != 0 || backref->namelen != 2 ||
2088 memcmp(backref->name, "..", 2))
2090 if (backref->found_dir_index || backref->found_dir_item)
2097 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2098 struct btrfs_root *root, struct btrfs_path *path,
2099 struct inode_record *rec)
2101 struct btrfs_inode_item *ei;
2102 struct btrfs_key key;
2105 key.objectid = rec->ino;
2106 key.type = BTRFS_INODE_ITEM_KEY;
2107 key.offset = (u64)-1;
2109 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2113 if (!path->slots[0]) {
2120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2121 if (key.objectid != rec->ino) {
2126 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2127 struct btrfs_inode_item);
2128 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2129 btrfs_mark_buffer_dirty(path->nodes[0]);
2130 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2131 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2132 root->root_key.objectid);
2134 btrfs_release_path(path);
2138 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root,
2140 struct btrfs_path *path,
2141 struct inode_record *rec)
2145 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2146 btrfs_release_path(path);
2148 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2152 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2153 struct btrfs_root *root,
2154 struct btrfs_path *path,
2155 struct inode_record *rec)
2157 struct btrfs_inode_item *ei;
2158 struct btrfs_key key;
2161 key.objectid = rec->ino;
2162 key.type = BTRFS_INODE_ITEM_KEY;
2165 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2172 /* Since ret == 0, no need to check anything */
2173 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2174 struct btrfs_inode_item);
2175 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2176 btrfs_mark_buffer_dirty(path->nodes[0]);
2177 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2178 printf("reset nbytes for ino %llu root %llu\n",
2179 rec->ino, root->root_key.objectid);
2181 btrfs_release_path(path);
2185 static int add_missing_dir_index(struct btrfs_root *root,
2186 struct cache_tree *inode_cache,
2187 struct inode_record *rec,
2188 struct inode_backref *backref)
2190 struct btrfs_path path;
2191 struct btrfs_trans_handle *trans;
2192 struct btrfs_dir_item *dir_item;
2193 struct extent_buffer *leaf;
2194 struct btrfs_key key;
2195 struct btrfs_disk_key disk_key;
2196 struct inode_record *dir_rec;
2197 unsigned long name_ptr;
2198 u32 data_size = sizeof(*dir_item) + backref->namelen;
2201 trans = btrfs_start_transaction(root, 1);
2203 return PTR_ERR(trans);
2205 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2206 (unsigned long long)rec->ino);
2208 btrfs_init_path(&path);
2209 key.objectid = backref->dir;
2210 key.type = BTRFS_DIR_INDEX_KEY;
2211 key.offset = backref->index;
2212 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2215 leaf = path.nodes[0];
2216 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2218 disk_key.objectid = cpu_to_le64(rec->ino);
2219 disk_key.type = BTRFS_INODE_ITEM_KEY;
2220 disk_key.offset = 0;
2222 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2223 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2224 btrfs_set_dir_data_len(leaf, dir_item, 0);
2225 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2226 name_ptr = (unsigned long)(dir_item + 1);
2227 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2228 btrfs_mark_buffer_dirty(leaf);
2229 btrfs_release_path(&path);
2230 btrfs_commit_transaction(trans, root);
2232 backref->found_dir_index = 1;
2233 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2234 BUG_ON(IS_ERR(dir_rec));
2237 dir_rec->found_size += backref->namelen;
2238 if (dir_rec->found_size == dir_rec->isize &&
2239 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2240 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2241 if (dir_rec->found_size != dir_rec->isize)
2242 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2247 static int delete_dir_index(struct btrfs_root *root,
2248 struct cache_tree *inode_cache,
2249 struct inode_record *rec,
2250 struct inode_backref *backref)
2252 struct btrfs_trans_handle *trans;
2253 struct btrfs_dir_item *di;
2254 struct btrfs_path path;
2257 trans = btrfs_start_transaction(root, 1);
2259 return PTR_ERR(trans);
2261 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2262 (unsigned long long)backref->dir,
2263 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2264 (unsigned long long)root->objectid);
2266 btrfs_init_path(&path);
2267 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2268 backref->name, backref->namelen,
2269 backref->index, -1);
2272 btrfs_release_path(&path);
2273 btrfs_commit_transaction(trans, root);
2280 ret = btrfs_del_item(trans, root, &path);
2282 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2284 btrfs_release_path(&path);
2285 btrfs_commit_transaction(trans, root);
2289 static int create_inode_item(struct btrfs_root *root,
2290 struct inode_record *rec,
2291 struct inode_backref *backref, int root_dir)
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_inode_item inode_item;
2295 time_t now = time(NULL);
2298 trans = btrfs_start_transaction(root, 1);
2299 if (IS_ERR(trans)) {
2300 ret = PTR_ERR(trans);
2304 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2305 "be incomplete, please check permissions and content after "
2306 "the fsck completes.\n", (unsigned long long)root->objectid,
2307 (unsigned long long)rec->ino);
2309 memset(&inode_item, 0, sizeof(inode_item));
2310 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2312 btrfs_set_stack_inode_nlink(&inode_item, 1);
2314 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2315 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2316 if (rec->found_dir_item) {
2317 if (rec->found_file_extent)
2318 fprintf(stderr, "root %llu inode %llu has both a dir "
2319 "item and extents, unsure if it is a dir or a "
2320 "regular file so setting it as a directory\n",
2321 (unsigned long long)root->objectid,
2322 (unsigned long long)rec->ino);
2323 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2324 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2325 } else if (!rec->found_dir_item) {
2326 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2327 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2329 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2330 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2331 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2332 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2333 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2334 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2335 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2336 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2338 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2340 btrfs_commit_transaction(trans, root);
2344 static int repair_inode_backrefs(struct btrfs_root *root,
2345 struct inode_record *rec,
2346 struct cache_tree *inode_cache,
2349 struct inode_backref *tmp, *backref;
2350 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2354 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2355 if (!delete && rec->ino == root_dirid) {
2356 if (!rec->found_inode_item) {
2357 ret = create_inode_item(root, rec, backref, 1);
2364 /* Index 0 for root dir's are special, don't mess with it */
2365 if (rec->ino == root_dirid && backref->index == 0)
2369 ((backref->found_dir_index && !backref->found_inode_ref) ||
2370 (backref->found_dir_index && backref->found_inode_ref &&
2371 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2372 ret = delete_dir_index(root, inode_cache, rec, backref);
2376 list_del(&backref->list);
2380 if (!delete && !backref->found_dir_index &&
2381 backref->found_dir_item && backref->found_inode_ref) {
2382 ret = add_missing_dir_index(root, inode_cache, rec,
2387 if (backref->found_dir_item &&
2388 backref->found_dir_index &&
2389 backref->found_dir_index) {
2390 if (!backref->errors &&
2391 backref->found_inode_ref) {
2392 list_del(&backref->list);
2398 if (!delete && (!backref->found_dir_index &&
2399 !backref->found_dir_item &&
2400 backref->found_inode_ref)) {
2401 struct btrfs_trans_handle *trans;
2402 struct btrfs_key location;
2404 ret = check_dir_conflict(root, backref->name,
2410 * let nlink fixing routine to handle it,
2411 * which can do it better.
2416 location.objectid = rec->ino;
2417 location.type = BTRFS_INODE_ITEM_KEY;
2418 location.offset = 0;
2420 trans = btrfs_start_transaction(root, 1);
2421 if (IS_ERR(trans)) {
2422 ret = PTR_ERR(trans);
2425 fprintf(stderr, "adding missing dir index/item pair "
2427 (unsigned long long)rec->ino);
2428 ret = btrfs_insert_dir_item(trans, root, backref->name,
2430 backref->dir, &location,
2431 imode_to_type(rec->imode),
2434 btrfs_commit_transaction(trans, root);
2438 if (!delete && (backref->found_inode_ref &&
2439 backref->found_dir_index &&
2440 backref->found_dir_item &&
2441 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2442 !rec->found_inode_item)) {
2443 ret = create_inode_item(root, rec, backref, 0);
2450 return ret ? ret : repaired;
2454 * To determine the file type for nlink/inode_item repair
2456 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2457 * Return -ENOENT if file type is not found.
2459 static int find_file_type(struct inode_record *rec, u8 *type)
2461 struct inode_backref *backref;
2463 /* For inode item recovered case */
2464 if (rec->found_inode_item) {
2465 *type = imode_to_type(rec->imode);
2469 list_for_each_entry(backref, &rec->backrefs, list) {
2470 if (backref->found_dir_index || backref->found_dir_item) {
2471 *type = backref->filetype;
2479 * To determine the file name for nlink repair
2481 * Return 0 if file name is found, set name and namelen.
2482 * Return -ENOENT if file name is not found.
2484 static int find_file_name(struct inode_record *rec,
2485 char *name, int *namelen)
2487 struct inode_backref *backref;
2489 list_for_each_entry(backref, &rec->backrefs, list) {
2490 if (backref->found_dir_index || backref->found_dir_item ||
2491 backref->found_inode_ref) {
2492 memcpy(name, backref->name, backref->namelen);
2493 *namelen = backref->namelen;
2500 /* Reset the nlink of the inode to the correct one */
2501 static int reset_nlink(struct btrfs_trans_handle *trans,
2502 struct btrfs_root *root,
2503 struct btrfs_path *path,
2504 struct inode_record *rec)
2506 struct inode_backref *backref;
2507 struct inode_backref *tmp;
2508 struct btrfs_key key;
2509 struct btrfs_inode_item *inode_item;
2512 /* We don't believe this either, reset it and iterate backref */
2513 rec->found_link = 0;
2515 /* Remove all backref including the valid ones */
2516 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2517 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2518 backref->index, backref->name,
2519 backref->namelen, 0);
2523 /* remove invalid backref, so it won't be added back */
2524 if (!(backref->found_dir_index &&
2525 backref->found_dir_item &&
2526 backref->found_inode_ref)) {
2527 list_del(&backref->list);
2534 /* Set nlink to 0 */
2535 key.objectid = rec->ino;
2536 key.type = BTRFS_INODE_ITEM_KEY;
2538 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2545 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2546 struct btrfs_inode_item);
2547 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2548 btrfs_mark_buffer_dirty(path->nodes[0]);
2549 btrfs_release_path(path);
2552 * Add back valid inode_ref/dir_item/dir_index,
2553 * add_link() will handle the nlink inc, so new nlink must be correct
2555 list_for_each_entry(backref, &rec->backrefs, list) {
2556 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2557 backref->name, backref->namelen,
2558 backref->filetype, &backref->index, 1);
2563 btrfs_release_path(path);
2567 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2568 struct btrfs_root *root,
2569 struct btrfs_path *path,
2570 struct inode_record *rec)
2572 char *dir_name = "lost+found";
2573 char namebuf[BTRFS_NAME_LEN] = {0};
2578 int name_recovered = 0;
2579 int type_recovered = 0;
2583 * Get file name and type first before these invalid inode ref
2584 * are deleted by remove_all_invalid_backref()
2586 name_recovered = !find_file_name(rec, namebuf, &namelen);
2587 type_recovered = !find_file_type(rec, &type);
2589 if (!name_recovered) {
2590 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2591 rec->ino, rec->ino);
2592 namelen = count_digits(rec->ino);
2593 sprintf(namebuf, "%llu", rec->ino);
2596 if (!type_recovered) {
2597 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2599 type = BTRFS_FT_REG_FILE;
2603 ret = reset_nlink(trans, root, path, rec);
2606 "Failed to reset nlink for inode %llu: %s\n",
2607 rec->ino, strerror(-ret));
2611 if (rec->found_link == 0) {
2612 lost_found_ino = root->highest_inode;
2613 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2618 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2619 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2622 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2623 dir_name, strerror(-ret));
2626 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2627 namebuf, namelen, type, NULL, 1);
2629 * Add ".INO" suffix several times to handle case where
2630 * "FILENAME.INO" is already taken by another file.
2632 while (ret == -EEXIST) {
2634 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2636 if (namelen + count_digits(rec->ino) + 1 >
2641 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2643 namelen += count_digits(rec->ino) + 1;
2644 ret = btrfs_add_link(trans, root, rec->ino,
2645 lost_found_ino, namebuf,
2646 namelen, type, NULL, 1);
2650 "Failed to link the inode %llu to %s dir: %s\n",
2651 rec->ino, dir_name, strerror(-ret));
2655 * Just increase the found_link, don't actually add the
2656 * backref. This will make things easier and this inode
2657 * record will be freed after the repair is done.
2658 * So fsck will not report problem about this inode.
2661 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2662 namelen, namebuf, dir_name);
2664 printf("Fixed the nlink of inode %llu\n", rec->ino);
2667 * Clear the flag anyway, or we will loop forever for the same inode
2668 * as it will not be removed from the bad inode list and the dead loop
2671 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2672 btrfs_release_path(path);
2677 * Check if there is any normal(reg or prealloc) file extent for given
2679 * This is used to determine the file type when neither its dir_index/item or
2680 * inode_item exists.
2682 * This will *NOT* report error, if any error happens, just consider it does
2683 * not have any normal file extent.
2685 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2687 struct btrfs_path path;
2688 struct btrfs_key key;
2689 struct btrfs_key found_key;
2690 struct btrfs_file_extent_item *fi;
2694 btrfs_init_path(&path);
2696 key.type = BTRFS_EXTENT_DATA_KEY;
2699 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2704 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2705 ret = btrfs_next_leaf(root, &path);
2712 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2714 if (found_key.objectid != ino ||
2715 found_key.type != BTRFS_EXTENT_DATA_KEY)
2717 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2718 struct btrfs_file_extent_item);
2719 type = btrfs_file_extent_type(path.nodes[0], fi);
2720 if (type != BTRFS_FILE_EXTENT_INLINE) {
2726 btrfs_release_path(&path);
2730 static u32 btrfs_type_to_imode(u8 type)
2732 static u32 imode_by_btrfs_type[] = {
2733 [BTRFS_FT_REG_FILE] = S_IFREG,
2734 [BTRFS_FT_DIR] = S_IFDIR,
2735 [BTRFS_FT_CHRDEV] = S_IFCHR,
2736 [BTRFS_FT_BLKDEV] = S_IFBLK,
2737 [BTRFS_FT_FIFO] = S_IFIFO,
2738 [BTRFS_FT_SOCK] = S_IFSOCK,
2739 [BTRFS_FT_SYMLINK] = S_IFLNK,
2742 return imode_by_btrfs_type[(type)];
2745 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2746 struct btrfs_root *root,
2747 struct btrfs_path *path,
2748 struct inode_record *rec)
2752 int type_recovered = 0;
2755 printf("Trying to rebuild inode:%llu\n", rec->ino);
2757 type_recovered = !find_file_type(rec, &filetype);
2760 * Try to determine inode type if type not found.
2762 * For found regular file extent, it must be FILE.
2763 * For found dir_item/index, it must be DIR.
2765 * For undetermined one, use FILE as fallback.
2768 * 1. If found backref(inode_index/item is already handled) to it,
2770 * Need new inode-inode ref structure to allow search for that.
2772 if (!type_recovered) {
2773 if (rec->found_file_extent &&
2774 find_normal_file_extent(root, rec->ino)) {
2776 filetype = BTRFS_FT_REG_FILE;
2777 } else if (rec->found_dir_item) {
2779 filetype = BTRFS_FT_DIR;
2780 } else if (!list_empty(&rec->orphan_extents)) {
2782 filetype = BTRFS_FT_REG_FILE;
2784 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2787 filetype = BTRFS_FT_REG_FILE;
2791 ret = btrfs_new_inode(trans, root, rec->ino,
2792 mode | btrfs_type_to_imode(filetype));
2797 * Here inode rebuild is done, we only rebuild the inode item,
2798 * don't repair the nlink(like move to lost+found).
2799 * That is the job of nlink repair.
2801 * We just fill the record and return
2803 rec->found_dir_item = 1;
2804 rec->imode = mode | btrfs_type_to_imode(filetype);
2806 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2807 /* Ensure the inode_nlinks repair function will be called */
2808 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2813 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2814 struct btrfs_root *root,
2815 struct btrfs_path *path,
2816 struct inode_record *rec)
2818 struct orphan_data_extent *orphan;
2819 struct orphan_data_extent *tmp;
2822 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2824 * Check for conflicting file extents
2826 * Here we don't know whether the extents is compressed or not,
2827 * so we can only assume it not compressed nor data offset,
2828 * and use its disk_len as extent length.
2830 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2831 orphan->offset, orphan->disk_len, 0);
2832 btrfs_release_path(path);
2837 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2838 orphan->disk_bytenr, orphan->disk_len);
2839 ret = btrfs_free_extent(trans,
2840 root->fs_info->extent_root,
2841 orphan->disk_bytenr, orphan->disk_len,
2842 0, root->objectid, orphan->objectid,
2847 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2848 orphan->offset, orphan->disk_bytenr,
2849 orphan->disk_len, orphan->disk_len);
2853 /* Update file size info */
2854 rec->found_size += orphan->disk_len;
2855 if (rec->found_size == rec->nbytes)
2856 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2858 /* Update the file extent hole info too */
2859 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2863 if (RB_EMPTY_ROOT(&rec->holes))
2864 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2866 list_del(&orphan->list);
2869 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2874 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2875 struct btrfs_root *root,
2876 struct btrfs_path *path,
2877 struct inode_record *rec)
2879 struct rb_node *node;
2880 struct file_extent_hole *hole;
2884 node = rb_first(&rec->holes);
2888 hole = rb_entry(node, struct file_extent_hole, node);
2889 ret = btrfs_punch_hole(trans, root, rec->ino,
2890 hole->start, hole->len);
2893 ret = del_file_extent_hole(&rec->holes, hole->start,
2897 if (RB_EMPTY_ROOT(&rec->holes))
2898 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2899 node = rb_first(&rec->holes);
2901 /* special case for a file losing all its file extent */
2903 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2904 round_up(rec->isize, root->sectorsize));
2908 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2909 rec->ino, root->objectid);
2914 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2916 struct btrfs_trans_handle *trans;
2917 struct btrfs_path path;
2920 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2921 I_ERR_NO_ORPHAN_ITEM |
2922 I_ERR_LINK_COUNT_WRONG |
2923 I_ERR_NO_INODE_ITEM |
2924 I_ERR_FILE_EXTENT_ORPHAN |
2925 I_ERR_FILE_EXTENT_DISCOUNT|
2926 I_ERR_FILE_NBYTES_WRONG)))
2930 * For nlink repair, it may create a dir and add link, so
2931 * 2 for parent(256)'s dir_index and dir_item
2932 * 2 for lost+found dir's inode_item and inode_ref
2933 * 1 for the new inode_ref of the file
2934 * 2 for lost+found dir's dir_index and dir_item for the file
2936 trans = btrfs_start_transaction(root, 7);
2938 return PTR_ERR(trans);
2940 btrfs_init_path(&path);
2941 if (rec->errors & I_ERR_NO_INODE_ITEM)
2942 ret = repair_inode_no_item(trans, root, &path, rec);
2943 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2944 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2945 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2946 ret = repair_inode_discount_extent(trans, root, &path, rec);
2947 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2948 ret = repair_inode_isize(trans, root, &path, rec);
2949 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2950 ret = repair_inode_orphan_item(trans, root, &path, rec);
2951 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2952 ret = repair_inode_nlinks(trans, root, &path, rec);
2953 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2954 ret = repair_inode_nbytes(trans, root, &path, rec);
2955 btrfs_commit_transaction(trans, root);
2956 btrfs_release_path(&path);
2960 static int check_inode_recs(struct btrfs_root *root,
2961 struct cache_tree *inode_cache)
2963 struct cache_extent *cache;
2964 struct ptr_node *node;
2965 struct inode_record *rec;
2966 struct inode_backref *backref;
2971 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2973 if (btrfs_root_refs(&root->root_item) == 0) {
2974 if (!cache_tree_empty(inode_cache))
2975 fprintf(stderr, "warning line %d\n", __LINE__);
2980 * We need to record the highest inode number for later 'lost+found'
2982 * We must select an ino not used/referred by any existing inode, or
2983 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2984 * this may cause 'lost+found' dir has wrong nlinks.
2986 cache = last_cache_extent(inode_cache);
2988 node = container_of(cache, struct ptr_node, cache);
2990 if (rec->ino > root->highest_inode)
2991 root->highest_inode = rec->ino;
2995 * We need to repair backrefs first because we could change some of the
2996 * errors in the inode recs.
2998 * We also need to go through and delete invalid backrefs first and then
2999 * add the correct ones second. We do this because we may get EEXIST
3000 * when adding back the correct index because we hadn't yet deleted the
3003 * For example, if we were missing a dir index then the directories
3004 * isize would be wrong, so if we fixed the isize to what we thought it
3005 * would be and then fixed the backref we'd still have a invalid fs, so
3006 * we need to add back the dir index and then check to see if the isize
3011 if (stage == 3 && !err)
3014 cache = search_cache_extent(inode_cache, 0);
3015 while (repair && cache) {
3016 node = container_of(cache, struct ptr_node, cache);
3018 cache = next_cache_extent(cache);
3020 /* Need to free everything up and rescan */
3022 remove_cache_extent(inode_cache, &node->cache);
3024 free_inode_rec(rec);
3028 if (list_empty(&rec->backrefs))
3031 ret = repair_inode_backrefs(root, rec, inode_cache,
3045 rec = get_inode_rec(inode_cache, root_dirid, 0);
3046 BUG_ON(IS_ERR(rec));
3048 ret = check_root_dir(rec);
3050 fprintf(stderr, "root %llu root dir %llu error\n",
3051 (unsigned long long)root->root_key.objectid,
3052 (unsigned long long)root_dirid);
3053 print_inode_error(root, rec);
3058 struct btrfs_trans_handle *trans;
3060 trans = btrfs_start_transaction(root, 1);
3061 if (IS_ERR(trans)) {
3062 err = PTR_ERR(trans);
3067 "root %llu missing its root dir, recreating\n",
3068 (unsigned long long)root->objectid);
3070 ret = btrfs_make_root_dir(trans, root, root_dirid);
3073 btrfs_commit_transaction(trans, root);
3077 fprintf(stderr, "root %llu root dir %llu not found\n",
3078 (unsigned long long)root->root_key.objectid,
3079 (unsigned long long)root_dirid);
3083 cache = search_cache_extent(inode_cache, 0);
3086 node = container_of(cache, struct ptr_node, cache);
3088 remove_cache_extent(inode_cache, &node->cache);
3090 if (rec->ino == root_dirid ||
3091 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3092 free_inode_rec(rec);
3096 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3097 ret = check_orphan_item(root, rec->ino);
3099 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3100 if (can_free_inode_rec(rec)) {
3101 free_inode_rec(rec);
3106 if (!rec->found_inode_item)
3107 rec->errors |= I_ERR_NO_INODE_ITEM;
3108 if (rec->found_link != rec->nlink)
3109 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3111 ret = try_repair_inode(root, rec);
3112 if (ret == 0 && can_free_inode_rec(rec)) {
3113 free_inode_rec(rec);
3119 if (!(repair && ret == 0))
3121 print_inode_error(root, rec);
3122 list_for_each_entry(backref, &rec->backrefs, list) {
3123 if (!backref->found_dir_item)
3124 backref->errors |= REF_ERR_NO_DIR_ITEM;
3125 if (!backref->found_dir_index)
3126 backref->errors |= REF_ERR_NO_DIR_INDEX;
3127 if (!backref->found_inode_ref)
3128 backref->errors |= REF_ERR_NO_INODE_REF;
3129 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3130 " namelen %u name %s filetype %d errors %x",
3131 (unsigned long long)backref->dir,
3132 (unsigned long long)backref->index,
3133 backref->namelen, backref->name,
3134 backref->filetype, backref->errors);
3135 print_ref_error(backref->errors);
3137 free_inode_rec(rec);
3139 return (error > 0) ? -1 : 0;
3142 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3145 struct cache_extent *cache;
3146 struct root_record *rec = NULL;
3149 cache = lookup_cache_extent(root_cache, objectid, 1);
3151 rec = container_of(cache, struct root_record, cache);
3153 rec = calloc(1, sizeof(*rec));
3155 return ERR_PTR(-ENOMEM);
3156 rec->objectid = objectid;
3157 INIT_LIST_HEAD(&rec->backrefs);
3158 rec->cache.start = objectid;
3159 rec->cache.size = 1;
3161 ret = insert_cache_extent(root_cache, &rec->cache);
3163 return ERR_PTR(-EEXIST);
3168 static struct root_backref *get_root_backref(struct root_record *rec,
3169 u64 ref_root, u64 dir, u64 index,
3170 const char *name, int namelen)
3172 struct root_backref *backref;
3174 list_for_each_entry(backref, &rec->backrefs, list) {
3175 if (backref->ref_root != ref_root || backref->dir != dir ||
3176 backref->namelen != namelen)
3178 if (memcmp(name, backref->name, namelen))
3183 backref = calloc(1, sizeof(*backref) + namelen + 1);
3186 backref->ref_root = ref_root;
3188 backref->index = index;
3189 backref->namelen = namelen;
3190 memcpy(backref->name, name, namelen);
3191 backref->name[namelen] = '\0';
3192 list_add_tail(&backref->list, &rec->backrefs);
3196 static void free_root_record(struct cache_extent *cache)
3198 struct root_record *rec;
3199 struct root_backref *backref;
3201 rec = container_of(cache, struct root_record, cache);
3202 while (!list_empty(&rec->backrefs)) {
3203 backref = to_root_backref(rec->backrefs.next);
3204 list_del(&backref->list);
3211 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3213 static int add_root_backref(struct cache_tree *root_cache,
3214 u64 root_id, u64 ref_root, u64 dir, u64 index,
3215 const char *name, int namelen,
3216 int item_type, int errors)
3218 struct root_record *rec;
3219 struct root_backref *backref;
3221 rec = get_root_rec(root_cache, root_id);
3222 BUG_ON(IS_ERR(rec));
3223 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3226 backref->errors |= errors;
3228 if (item_type != BTRFS_DIR_ITEM_KEY) {
3229 if (backref->found_dir_index || backref->found_back_ref ||
3230 backref->found_forward_ref) {
3231 if (backref->index != index)
3232 backref->errors |= REF_ERR_INDEX_UNMATCH;
3234 backref->index = index;
3238 if (item_type == BTRFS_DIR_ITEM_KEY) {
3239 if (backref->found_forward_ref)
3241 backref->found_dir_item = 1;
3242 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3243 backref->found_dir_index = 1;
3244 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3245 if (backref->found_forward_ref)
3246 backref->errors |= REF_ERR_DUP_ROOT_REF;
3247 else if (backref->found_dir_item)
3249 backref->found_forward_ref = 1;
3250 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3251 if (backref->found_back_ref)
3252 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3253 backref->found_back_ref = 1;
3258 if (backref->found_forward_ref && backref->found_dir_item)
3259 backref->reachable = 1;
3263 static int merge_root_recs(struct btrfs_root *root,
3264 struct cache_tree *src_cache,
3265 struct cache_tree *dst_cache)
3267 struct cache_extent *cache;
3268 struct ptr_node *node;
3269 struct inode_record *rec;
3270 struct inode_backref *backref;
3273 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3274 free_inode_recs_tree(src_cache);
3279 cache = search_cache_extent(src_cache, 0);
3282 node = container_of(cache, struct ptr_node, cache);
3284 remove_cache_extent(src_cache, &node->cache);
3287 ret = is_child_root(root, root->objectid, rec->ino);
3293 list_for_each_entry(backref, &rec->backrefs, list) {
3294 BUG_ON(backref->found_inode_ref);
3295 if (backref->found_dir_item)
3296 add_root_backref(dst_cache, rec->ino,
3297 root->root_key.objectid, backref->dir,
3298 backref->index, backref->name,
3299 backref->namelen, BTRFS_DIR_ITEM_KEY,
3301 if (backref->found_dir_index)
3302 add_root_backref(dst_cache, rec->ino,
3303 root->root_key.objectid, backref->dir,
3304 backref->index, backref->name,
3305 backref->namelen, BTRFS_DIR_INDEX_KEY,
3309 free_inode_rec(rec);
3316 static int check_root_refs(struct btrfs_root *root,
3317 struct cache_tree *root_cache)
3319 struct root_record *rec;
3320 struct root_record *ref_root;
3321 struct root_backref *backref;
3322 struct cache_extent *cache;
3328 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3329 BUG_ON(IS_ERR(rec));
3332 /* fixme: this can not detect circular references */
3335 cache = search_cache_extent(root_cache, 0);
3339 rec = container_of(cache, struct root_record, cache);
3340 cache = next_cache_extent(cache);
3342 if (rec->found_ref == 0)
3345 list_for_each_entry(backref, &rec->backrefs, list) {
3346 if (!backref->reachable)
3349 ref_root = get_root_rec(root_cache,
3351 BUG_ON(IS_ERR(ref_root));
3352 if (ref_root->found_ref > 0)
3355 backref->reachable = 0;
3357 if (rec->found_ref == 0)
3363 cache = search_cache_extent(root_cache, 0);
3367 rec = container_of(cache, struct root_record, cache);
3368 cache = next_cache_extent(cache);
3370 if (rec->found_ref == 0 &&
3371 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3372 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3373 ret = check_orphan_item(root->fs_info->tree_root,
3379 * If we don't have a root item then we likely just have
3380 * a dir item in a snapshot for this root but no actual
3381 * ref key or anything so it's meaningless.
3383 if (!rec->found_root_item)
3386 fprintf(stderr, "fs tree %llu not referenced\n",
3387 (unsigned long long)rec->objectid);
3391 if (rec->found_ref > 0 && !rec->found_root_item)
3393 list_for_each_entry(backref, &rec->backrefs, list) {
3394 if (!backref->found_dir_item)
3395 backref->errors |= REF_ERR_NO_DIR_ITEM;
3396 if (!backref->found_dir_index)
3397 backref->errors |= REF_ERR_NO_DIR_INDEX;
3398 if (!backref->found_back_ref)
3399 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3400 if (!backref->found_forward_ref)
3401 backref->errors |= REF_ERR_NO_ROOT_REF;
3402 if (backref->reachable && backref->errors)
3409 fprintf(stderr, "fs tree %llu refs %u %s\n",
3410 (unsigned long long)rec->objectid, rec->found_ref,
3411 rec->found_root_item ? "" : "not found");
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3416 if (!backref->errors && rec->found_root_item)
3418 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3419 " index %llu namelen %u name %s errors %x\n",
3420 (unsigned long long)backref->ref_root,
3421 (unsigned long long)backref->dir,
3422 (unsigned long long)backref->index,
3423 backref->namelen, backref->name,
3425 print_ref_error(backref->errors);
3428 return errors > 0 ? 1 : 0;
3431 static int process_root_ref(struct extent_buffer *eb, int slot,
3432 struct btrfs_key *key,
3433 struct cache_tree *root_cache)
3439 struct btrfs_root_ref *ref;
3440 char namebuf[BTRFS_NAME_LEN];
3443 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3445 dirid = btrfs_root_ref_dirid(eb, ref);
3446 index = btrfs_root_ref_sequence(eb, ref);
3447 name_len = btrfs_root_ref_name_len(eb, ref);
3449 if (name_len <= BTRFS_NAME_LEN) {
3453 len = BTRFS_NAME_LEN;
3454 error = REF_ERR_NAME_TOO_LONG;
3456 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3458 if (key->type == BTRFS_ROOT_REF_KEY) {
3459 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3460 index, namebuf, len, key->type, error);
3462 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3463 index, namebuf, len, key->type, error);
3468 static void free_corrupt_block(struct cache_extent *cache)
3470 struct btrfs_corrupt_block *corrupt;
3472 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3476 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3479 * Repair the btree of the given root.
3481 * The fix is to remove the node key in corrupt_blocks cache_tree.
3482 * and rebalance the tree.
3483 * After the fix, the btree should be writeable.
3485 static int repair_btree(struct btrfs_root *root,
3486 struct cache_tree *corrupt_blocks)
3488 struct btrfs_trans_handle *trans;
3489 struct btrfs_path path;
3490 struct btrfs_corrupt_block *corrupt;
3491 struct cache_extent *cache;
3492 struct btrfs_key key;
3497 if (cache_tree_empty(corrupt_blocks))
3500 trans = btrfs_start_transaction(root, 1);
3501 if (IS_ERR(trans)) {
3502 ret = PTR_ERR(trans);
3503 fprintf(stderr, "Error starting transaction: %s\n",
3507 btrfs_init_path(&path);
3508 cache = first_cache_extent(corrupt_blocks);
3510 corrupt = container_of(cache, struct btrfs_corrupt_block,
3512 level = corrupt->level;
3513 path.lowest_level = level;
3514 key.objectid = corrupt->key.objectid;
3515 key.type = corrupt->key.type;
3516 key.offset = corrupt->key.offset;
3519 * Here we don't want to do any tree balance, since it may
3520 * cause a balance with corrupted brother leaf/node,
3521 * so ins_len set to 0 here.
3522 * Balance will be done after all corrupt node/leaf is deleted.
3524 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3527 offset = btrfs_node_blockptr(path.nodes[level],
3530 /* Remove the ptr */
3531 ret = btrfs_del_ptr(trans, root, &path, level,
3536 * Remove the corresponding extent
3537 * return value is not concerned.
3539 btrfs_release_path(&path);
3540 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3541 0, root->root_key.objectid,
3543 cache = next_cache_extent(cache);
3546 /* Balance the btree using btrfs_search_slot() */
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 memcpy(&key, &corrupt->key, sizeof(key));
3552 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3555 /* return will always >0 since it won't find the item */
3557 btrfs_release_path(&path);
3558 cache = next_cache_extent(cache);
3561 btrfs_commit_transaction(trans, root);
3562 btrfs_release_path(&path);
3566 static int check_fs_root(struct btrfs_root *root,
3567 struct cache_tree *root_cache,
3568 struct walk_control *wc)
3574 struct btrfs_path path;
3575 struct shared_node root_node;
3576 struct root_record *rec;
3577 struct btrfs_root_item *root_item = &root->root_item;
3578 struct cache_tree corrupt_blocks;
3579 struct orphan_data_extent *orphan;
3580 struct orphan_data_extent *tmp;
3581 enum btrfs_tree_block_status status;
3582 struct node_refs nrefs;
3585 * Reuse the corrupt_block cache tree to record corrupted tree block
3587 * Unlike the usage in extent tree check, here we do it in a per
3588 * fs/subvol tree base.
3590 cache_tree_init(&corrupt_blocks);
3591 root->fs_info->corrupt_blocks = &corrupt_blocks;
3593 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3594 rec = get_root_rec(root_cache, root->root_key.objectid);
3595 BUG_ON(IS_ERR(rec));
3596 if (btrfs_root_refs(root_item) > 0)
3597 rec->found_root_item = 1;
3600 btrfs_init_path(&path);
3601 memset(&root_node, 0, sizeof(root_node));
3602 cache_tree_init(&root_node.root_cache);
3603 cache_tree_init(&root_node.inode_cache);
3604 memset(&nrefs, 0, sizeof(nrefs));
3606 /* Move the orphan extent record to corresponding inode_record */
3607 list_for_each_entry_safe(orphan, tmp,
3608 &root->orphan_data_extents, list) {
3609 struct inode_record *inode;
3611 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3613 BUG_ON(IS_ERR(inode));
3614 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3615 list_move(&orphan->list, &inode->orphan_extents);
3618 level = btrfs_header_level(root->node);
3619 memset(wc->nodes, 0, sizeof(wc->nodes));
3620 wc->nodes[level] = &root_node;
3621 wc->active_node = level;
3622 wc->root_level = level;
3624 /* We may not have checked the root block, lets do that now */
3625 if (btrfs_is_leaf(root->node))
3626 status = btrfs_check_leaf(root, NULL, root->node);
3628 status = btrfs_check_node(root, NULL, root->node);
3629 if (status != BTRFS_TREE_BLOCK_CLEAN)
3632 if (btrfs_root_refs(root_item) > 0 ||
3633 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3634 path.nodes[level] = root->node;
3635 extent_buffer_get(root->node);
3636 path.slots[level] = 0;
3638 struct btrfs_key key;
3639 struct btrfs_disk_key found_key;
3641 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3642 level = root_item->drop_level;
3643 path.lowest_level = level;
3644 if (level > btrfs_header_level(root->node) ||
3645 level >= BTRFS_MAX_LEVEL) {
3646 error("ignoring invalid drop level: %u", level);
3649 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3652 btrfs_node_key(path.nodes[level], &found_key,
3654 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3655 sizeof(found_key)));
3659 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3665 wret = walk_up_tree(root, &path, wc, &level);
3672 btrfs_release_path(&path);
3674 if (!cache_tree_empty(&corrupt_blocks)) {
3675 struct cache_extent *cache;
3676 struct btrfs_corrupt_block *corrupt;
3678 printf("The following tree block(s) is corrupted in tree %llu:\n",
3679 root->root_key.objectid);
3680 cache = first_cache_extent(&corrupt_blocks);
3682 corrupt = container_of(cache,
3683 struct btrfs_corrupt_block,
3685 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3686 cache->start, corrupt->level,
3687 corrupt->key.objectid, corrupt->key.type,
3688 corrupt->key.offset);
3689 cache = next_cache_extent(cache);
3692 printf("Try to repair the btree for root %llu\n",
3693 root->root_key.objectid);
3694 ret = repair_btree(root, &corrupt_blocks);
3696 fprintf(stderr, "Failed to repair btree: %s\n",
3699 printf("Btree for root %llu is fixed\n",
3700 root->root_key.objectid);
3704 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3708 if (root_node.current) {
3709 root_node.current->checked = 1;
3710 maybe_free_inode_rec(&root_node.inode_cache,
3714 err = check_inode_recs(root, &root_node.inode_cache);
3718 free_corrupt_blocks_tree(&corrupt_blocks);
3719 root->fs_info->corrupt_blocks = NULL;
3720 free_orphan_data_extents(&root->orphan_data_extents);
3724 static int fs_root_objectid(u64 objectid)
3726 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3727 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3729 return is_fstree(objectid);
3732 static int check_fs_roots(struct btrfs_root *root,
3733 struct cache_tree *root_cache)
3735 struct btrfs_path path;
3736 struct btrfs_key key;
3737 struct walk_control wc;
3738 struct extent_buffer *leaf, *tree_node;
3739 struct btrfs_root *tmp_root;
3740 struct btrfs_root *tree_root = root->fs_info->tree_root;
3744 if (ctx.progress_enabled) {
3745 ctx.tp = TASK_FS_ROOTS;
3746 task_start(ctx.info);
3750 * Just in case we made any changes to the extent tree that weren't
3751 * reflected into the free space cache yet.
3754 reset_cached_block_groups(root->fs_info);
3755 memset(&wc, 0, sizeof(wc));
3756 cache_tree_init(&wc.shared);
3757 btrfs_init_path(&path);
3762 key.type = BTRFS_ROOT_ITEM_KEY;
3763 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3768 tree_node = tree_root->node;
3770 if (tree_node != tree_root->node) {
3771 free_root_recs_tree(root_cache);
3772 btrfs_release_path(&path);
3775 leaf = path.nodes[0];
3776 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3777 ret = btrfs_next_leaf(tree_root, &path);
3783 leaf = path.nodes[0];
3785 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3786 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3787 fs_root_objectid(key.objectid)) {
3788 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3789 tmp_root = btrfs_read_fs_root_no_cache(
3790 root->fs_info, &key);
3792 key.offset = (u64)-1;
3793 tmp_root = btrfs_read_fs_root(
3794 root->fs_info, &key);
3796 if (IS_ERR(tmp_root)) {
3800 ret = check_fs_root(tmp_root, root_cache, &wc);
3801 if (ret == -EAGAIN) {
3802 free_root_recs_tree(root_cache);
3803 btrfs_release_path(&path);
3808 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3809 btrfs_free_fs_root(tmp_root);
3810 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3811 key.type == BTRFS_ROOT_BACKREF_KEY) {
3812 process_root_ref(leaf, path.slots[0], &key,
3819 btrfs_release_path(&path);
3821 free_extent_cache_tree(&wc.shared);
3822 if (!cache_tree_empty(&wc.shared))
3823 fprintf(stderr, "warning line %d\n", __LINE__);
3825 task_stop(ctx.info);
3830 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3831 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3832 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3833 #define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
3834 #define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
3835 #define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
3836 #define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
3837 #define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
3838 #define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
3839 #define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
3840 #define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
3841 #define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
3842 #define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
3843 #define LAST_ITEM (1<<15) /* Complete this tree traversal */
3846 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3847 * INODE_REF/INODE_EXTREF match.
3849 * @root: the root of the fs/file tree
3850 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3851 * @key: the key of the DIR_ITEM/DIR_INDEX
3852 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3853 * distinguish root_dir between normal dir/file
3854 * @name: the name in the INODE_REF/INODE_EXTREF
3855 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3856 * @mode: the st_mode of INODE_ITEM
3858 * Return 0 if no error occurred.
3859 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3860 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3862 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3863 * not match for normal dir/file.
3865 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3866 struct btrfs_key *key, u64 index, char *name,
3867 u32 namelen, u32 mode)
3869 struct btrfs_path path;
3870 struct extent_buffer *node;
3871 struct btrfs_dir_item *di;
3872 struct btrfs_key location;
3873 char namebuf[BTRFS_NAME_LEN] = {0};
3883 btrfs_init_path(&path);
3884 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3886 ret = DIR_ITEM_MISSING;
3890 /* Process root dir and goto out*/
3893 ret = ROOT_DIR_ERROR;
3895 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3897 ref_key->type == BTRFS_INODE_REF_KEY ?
3899 ref_key->objectid, ref_key->offset,
3900 key->type == BTRFS_DIR_ITEM_KEY ?
3901 "DIR_ITEM" : "DIR_INDEX");
3909 /* Process normal file/dir */
3911 ret = DIR_ITEM_MISSING;
3913 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3915 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3916 ref_key->objectid, ref_key->offset,
3917 key->type == BTRFS_DIR_ITEM_KEY ?
3918 "DIR_ITEM" : "DIR_INDEX",
3919 key->objectid, key->offset, namelen, name,
3920 imode_to_type(mode));
3924 /* Check whether inode_id/filetype/name match */
3925 node = path.nodes[0];
3926 slot = path.slots[0];
3927 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3928 total = btrfs_item_size_nr(node, slot);
3929 while (cur < total) {
3930 ret = DIR_ITEM_MISMATCH;
3931 name_len = btrfs_dir_name_len(node, di);
3932 data_len = btrfs_dir_data_len(node, di);
3934 btrfs_dir_item_key_to_cpu(node, di, &location);
3935 if (location.objectid != ref_key->objectid ||
3936 location.type != BTRFS_INODE_ITEM_KEY ||
3937 location.offset != 0)
3940 filetype = btrfs_dir_type(node, di);
3941 if (imode_to_type(mode) != filetype)
3944 if (name_len <= BTRFS_NAME_LEN) {
3947 len = BTRFS_NAME_LEN;
3948 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3950 key->type == BTRFS_DIR_ITEM_KEY ?
3951 "DIR_ITEM" : "DIR_INDEX",
3952 key->objectid, key->offset, name_len);
3954 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3955 if (len != namelen || strncmp(namebuf, name, len))
3961 len = sizeof(*di) + name_len + data_len;
3962 di = (struct btrfs_dir_item *)((char *)di + len);
3965 if (ret == DIR_ITEM_MISMATCH)
3967 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3969 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3970 ref_key->objectid, ref_key->offset,
3971 key->type == BTRFS_DIR_ITEM_KEY ?
3972 "DIR_ITEM" : "DIR_INDEX",
3973 key->objectid, key->offset, namelen, name,
3974 imode_to_type(mode));
3976 btrfs_release_path(&path);
3981 * Traverse the given INODE_REF and call find_dir_item() to find related
3982 * DIR_ITEM/DIR_INDEX.
3984 * @root: the root of the fs/file tree
3985 * @ref_key: the key of the INODE_REF
3986 * @refs: the count of INODE_REF
3987 * @mode: the st_mode of INODE_ITEM
3989 * Return 0 if no error occurred.
3991 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
3992 struct extent_buffer *node, int slot, u64 *refs,
3995 struct btrfs_key key;
3996 struct btrfs_inode_ref *ref;
3997 char namebuf[BTRFS_NAME_LEN] = {0};
4005 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4006 total = btrfs_item_size_nr(node, slot);
4009 /* Update inode ref count */
4012 index = btrfs_inode_ref_index(node, ref);
4013 name_len = btrfs_inode_ref_name_len(node, ref);
4014 if (name_len <= BTRFS_NAME_LEN) {
4017 len = BTRFS_NAME_LEN;
4018 warning("root %llu INODE_REF[%llu %llu] name too long",
4019 root->objectid, ref_key->objectid, ref_key->offset);
4022 read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4024 /* Check root dir ref name */
4025 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4026 error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't be %s",
4027 root->objectid, ref_key->objectid, ref_key->offset,
4029 err |= ROOT_DIR_ERROR;
4032 /* Find related DIR_INDEX */
4033 key.objectid = ref_key->offset;
4034 key.type = BTRFS_DIR_INDEX_KEY;
4036 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4039 /* Find related dir_item */
4040 key.objectid = ref_key->offset;
4041 key.type = BTRFS_DIR_ITEM_KEY;
4042 key.offset = btrfs_name_hash(namebuf, len);
4043 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4046 len = sizeof(*ref) + name_len;
4047 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4056 * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4057 * DIR_ITEM/DIR_INDEX.
4059 * @root: the root of the fs/file tree
4060 * @ref_key: the key of the INODE_EXTREF
4061 * @refs: the count of INODE_EXTREF
4062 * @mode: the st_mode of INODE_ITEM
4064 * Return 0 if no error occurred.
4066 static int check_inode_extref(struct btrfs_root *root,
4067 struct btrfs_key *ref_key,
4068 struct extent_buffer *node, int slot, u64 *refs,
4071 struct btrfs_key key;
4072 struct btrfs_inode_extref *extref;
4073 char namebuf[BTRFS_NAME_LEN] = {0};
4083 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4084 total = btrfs_item_size_nr(node, slot);
4087 /* update inode ref count */
4089 name_len = btrfs_inode_extref_name_len(node, extref);
4090 index = btrfs_inode_extref_index(node, extref);
4091 parent = btrfs_inode_extref_parent(node, extref);
4092 if (name_len <= BTRFS_NAME_LEN) {
4095 len = BTRFS_NAME_LEN;
4096 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4097 root->objectid, ref_key->objectid, ref_key->offset);
4099 read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4101 /* Check root dir ref name */
4102 if (index == 0 && strncmp(namebuf, "..", name_len)) {
4103 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4104 root->objectid, ref_key->objectid, ref_key->offset,
4106 err |= ROOT_DIR_ERROR;
4109 /* find related dir_index */
4110 key.objectid = parent;
4111 key.type = BTRFS_DIR_INDEX_KEY;
4113 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4116 /* find related dir_item */
4117 key.objectid = parent;
4118 key.type = BTRFS_DIR_ITEM_KEY;
4119 key.offset = btrfs_name_hash(namebuf, len);
4120 ret = find_dir_item(root, ref_key, &key, index, namebuf, len, mode);
4123 len = sizeof(*extref) + name_len;
4124 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4134 * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4135 * DIR_ITEM/DIR_INDEX match.
4137 * @root: the root of the fs/file tree
4138 * @key: the key of the INODE_REF/INODE_EXTREF
4139 * @name: the name in the INODE_REF/INODE_EXTREF
4140 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
4141 * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
4143 * @ext_ref: the EXTENDED_IREF feature
4145 * Return 0 if no error occurred.
4146 * Return >0 for error bitmap
4148 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4149 char *name, int namelen, u64 index,
4150 unsigned int ext_ref)
4152 struct btrfs_path path;
4153 struct btrfs_inode_ref *ref;
4154 struct btrfs_inode_extref *extref;
4155 struct extent_buffer *node;
4156 char ref_namebuf[BTRFS_NAME_LEN] = {0};
4167 btrfs_init_path(&path);
4168 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4170 ret = INODE_REF_MISSING;
4174 node = path.nodes[0];
4175 slot = path.slots[0];
4177 ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4178 total = btrfs_item_size_nr(node, slot);
4180 /* Iterate all entry of INODE_REF */
4181 while (cur < total) {
4182 ret = INODE_REF_MISSING;
4184 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4185 ref_index = btrfs_inode_ref_index(node, ref);
4186 if (index != (u64)-1 && index != ref_index)
4189 if (ref_namelen <= BTRFS_NAME_LEN) {
4192 len = BTRFS_NAME_LEN;
4193 warning("root %llu INODE %s[%llu %llu] name too long",
4195 key->type == BTRFS_INODE_REF_KEY ?
4197 key->objectid, key->offset);
4199 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4202 if (len != namelen || strncmp(ref_namebuf, name, len))
4208 len = sizeof(*ref) + ref_namelen;
4209 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4214 /* Skip if not support EXTENDED_IREF feature */
4218 btrfs_release_path(&path);
4219 btrfs_init_path(&path);
4221 dir_id = key->offset;
4222 key->type = BTRFS_INODE_EXTREF_KEY;
4223 key->offset = btrfs_extref_hash(dir_id, name, namelen);
4225 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4227 ret = INODE_REF_MISSING;
4231 node = path.nodes[0];
4232 slot = path.slots[0];
4234 extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4236 total = btrfs_item_size_nr(node, slot);
4238 /* Iterate all entry of INODE_EXTREF */
4239 while (cur < total) {
4240 ret = INODE_REF_MISSING;
4242 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4243 ref_index = btrfs_inode_extref_index(node, extref);
4244 parent = btrfs_inode_extref_parent(node, extref);
4245 if (index != (u64)-1 && index != ref_index)
4248 if (parent != dir_id)
4251 if (ref_namelen <= BTRFS_NAME_LEN) {
4254 len = BTRFS_NAME_LEN;
4255 warning("Warning: root %llu INODE %s[%llu %llu] name too long\n",
4257 key->type == BTRFS_INODE_REF_KEY ?
4259 key->objectid, key->offset);
4261 read_extent_buffer(node, ref_namebuf,
4262 (unsigned long)(extref + 1), len);
4264 if (len != namelen || strncmp(ref_namebuf, name, len))
4271 len = sizeof(*extref) + ref_namelen;
4272 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4277 btrfs_release_path(&path);
4282 * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
4283 * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
4285 * @root: the root of the fs/file tree
4286 * @key: the key of the INODE_REF/INODE_EXTREF
4287 * @size: the st_size of the INODE_ITEM
4288 * @ext_ref: the EXTENDED_IREF feature
4290 * Return 0 if no error occurred.
4292 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4293 struct extent_buffer *node, int slot, u64 *size,
4294 unsigned int ext_ref)
4296 struct btrfs_dir_item *di;
4297 struct btrfs_inode_item *ii;
4298 struct btrfs_path path;
4299 struct btrfs_key location;
4300 char namebuf[BTRFS_NAME_LEN] = {0};
4313 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
4314 * ignore index check.
4316 index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
4318 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4319 total = btrfs_item_size_nr(node, slot);
4321 while (cur < total) {
4322 data_len = btrfs_dir_data_len(node, di);
4324 error("root %llu %s[%llu %llu] data_len shouldn't be %u",
4325 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4326 "DIR_ITEM" : "DIR_INDEX",
4327 key->objectid, key->offset, data_len);
4329 name_len = btrfs_dir_name_len(node, di);
4330 if (name_len <= BTRFS_NAME_LEN) {
4333 len = BTRFS_NAME_LEN;
4334 warning("root %llu %s[%llu %llu] name too long",
4336 key->type == BTRFS_DIR_ITEM_KEY ?
4337 "DIR_ITEM" : "DIR_INDEX",
4338 key->objectid, key->offset);
4340 (*size) += name_len;
4342 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
4343 filetype = btrfs_dir_type(node, di);
4345 btrfs_init_path(&path);
4346 btrfs_dir_item_key_to_cpu(node, di, &location);
4348 /* Ignore related ROOT_ITEM check */
4349 if (location.type == BTRFS_ROOT_ITEM_KEY)
4352 /* Check relative INODE_ITEM(existence/filetype) */
4353 ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
4355 err |= INODE_ITEM_MISSING;
4356 error("root %llu %s[%llu %llu] couldn't find relative INODE_ITEM[%llu] namelen %u filename %s filetype %x",
4357 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4358 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4359 key->offset, location.objectid, name_len,
4364 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
4365 struct btrfs_inode_item);
4366 mode = btrfs_inode_mode(path.nodes[0], ii);
4368 if (imode_to_type(mode) != filetype) {
4369 err |= INODE_ITEM_MISMATCH;
4370 error("root %llu %s[%llu %llu] relative INODE_ITEM filetype mismatch namelen %u filename %s filetype %d",
4371 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4372 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4373 key->offset, name_len, namebuf, filetype);
4376 /* Check relative INODE_REF/INODE_EXTREF */
4377 location.type = BTRFS_INODE_REF_KEY;
4378 location.offset = key->objectid;
4379 ret = find_inode_ref(root, &location, namebuf, len,
4382 if (ret & INODE_REF_MISSING)
4383 error("root %llu %s[%llu %llu] relative INODE_REF missing namelen %u filename %s filetype %d",
4384 root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
4385 "DIR_ITEM" : "DIR_INDEX", key->objectid,
4386 key->offset, name_len, namebuf, filetype);
4389 btrfs_release_path(&path);
4390 len = sizeof(*di) + name_len + data_len;
4391 di = (struct btrfs_dir_item *)((char *)di + len);
4394 if (key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
4395 error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
4396 root->objectid, key->objectid, key->offset);
4405 * Check file extent datasum/hole, update the size of the file extents,
4406 * check and update the last offset of the file extent.
4408 * @root: the root of fs/file tree.
4409 * @fkey: the key of the file extent.
4410 * @nodatasum: INODE_NODATASUM feature.
4411 * @size: the sum of all EXTENT_DATA items size for this inode.
4412 * @end: the offset of the last extent.
4414 * Return 0 if no error occurred.
4416 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
4417 struct extent_buffer *node, int slot,
4418 unsigned int nodatasum, u64 *size, u64 *end)
4420 struct btrfs_file_extent_item *fi;
4423 u64 extent_num_bytes;
4425 unsigned int extent_type;
4426 unsigned int is_hole;
4430 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
4432 extent_type = btrfs_file_extent_type(node, fi);
4433 /* Skip if file extent is inline */
4434 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4435 struct btrfs_item *e = btrfs_item_nr(slot);
4436 u32 item_inline_len;
4438 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
4439 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
4440 if (extent_num_bytes == 0 ||
4441 extent_num_bytes != item_inline_len)
4442 err |= FILE_EXTENT_ERROR;
4443 *size += extent_num_bytes;
4447 /* Check extent type */
4448 if (extent_type != BTRFS_FILE_EXTENT_REG &&
4449 extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
4450 err |= FILE_EXTENT_ERROR;
4451 error("root %llu EXTENT_DATA[%llu %llu] type bad",
4452 root->objectid, fkey->objectid, fkey->offset);
4456 /* Check REG_EXTENT/PREALLOC_EXTENT */
4457 disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
4458 disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
4459 extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
4460 is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
4462 /* Check EXTENT_DATA datasum */
4463 ret = count_csum_range(root, disk_bytenr, disk_num_bytes, &found);
4464 if (found > 0 && nodatasum) {
4465 err |= ODD_CSUM_ITEM;
4466 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
4467 root->objectid, fkey->objectid, fkey->offset);
4468 } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
4470 (ret < 0 || found == 0 || found < disk_num_bytes)) {
4471 err |= CSUM_ITEM_MISSING;
4472 error("root %llu EXTENT_DATA[%llu %llu] datasum missing",
4473 root->objectid, fkey->objectid, fkey->offset);
4474 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && found > 0) {
4475 err |= ODD_CSUM_ITEM;
4476 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have datasum",
4477 root->objectid, fkey->objectid, fkey->offset);
4480 /* Check EXTENT_DATA hole */
4481 if (no_holes && is_hole) {
4482 err |= FILE_EXTENT_ERROR;
4483 error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
4484 root->objectid, fkey->objectid, fkey->offset);
4485 } else if (!no_holes && *end != fkey->offset) {
4486 err |= FILE_EXTENT_ERROR;
4487 error("root %llu EXTENT_DATA[%llu %llu] interrupt",
4488 root->objectid, fkey->objectid, fkey->offset);
4491 *end += extent_num_bytes;
4493 *size += extent_num_bytes;
4499 * Check INODE_ITEM and related ITEMs (the same inode number)
4500 * 1. check link count
4501 * 2. check inode ref/extref
4502 * 3. check dir item/index
4504 * @ext_ref: the EXTENDED_IREF feature
4506 * Return 0 if no error occurred.
4507 * Return >0 for error or hit the traversal is done(by error bitmap)
4509 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
4510 unsigned int ext_ref)
4512 struct extent_buffer *node;
4513 struct btrfs_inode_item *ii;
4514 struct btrfs_key key;
4523 u64 extent_size = 0;
4525 unsigned int nodatasum;
4530 node = path->nodes[0];
4531 slot = path->slots[0];
4533 btrfs_item_key_to_cpu(node, &key, slot);
4534 inode_id = key.objectid;
4536 if (inode_id == BTRFS_ORPHAN_OBJECTID) {
4537 ret = btrfs_next_item(root, path);
4543 ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
4544 isize = btrfs_inode_size(node, ii);
4545 nbytes = btrfs_inode_nbytes(node, ii);
4546 mode = btrfs_inode_mode(node, ii);
4547 dir = imode_to_type(mode) == BTRFS_FT_DIR;
4548 nlink = btrfs_inode_nlink(node, ii);
4549 nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
4552 ret = btrfs_next_item(root, path);
4554 /* out will fill 'err' rusing current statistics */
4556 } else if (ret > 0) {
4561 node = path->nodes[0];
4562 slot = path->slots[0];
4563 btrfs_item_key_to_cpu(node, &key, slot);
4564 if (key.objectid != inode_id)
4568 case BTRFS_INODE_REF_KEY:
4569 ret = check_inode_ref(root, &key, node, slot, &refs,
4573 case BTRFS_INODE_EXTREF_KEY:
4574 if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
4575 warning("root %llu EXTREF[%llu %llu] isn't supported",
4576 root->objectid, key.objectid,
4578 ret = check_inode_extref(root, &key, node, slot, &refs,
4582 case BTRFS_DIR_ITEM_KEY:
4583 case BTRFS_DIR_INDEX_KEY:
4585 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
4586 root->objectid, inode_id,
4587 imode_to_type(mode), key.objectid,
4590 ret = check_dir_item(root, &key, node, slot, &size,
4594 case BTRFS_EXTENT_DATA_KEY:
4596 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
4597 root->objectid, inode_id, key.objectid,
4600 ret = check_file_extent(root, &key, node, slot,
4601 nodatasum, &extent_size,
4605 case BTRFS_XATTR_ITEM_KEY:
4608 error("ITEM[%llu %u %llu] UNKNOWN TYPE",
4609 key.objectid, key.type, key.offset);
4614 /* verify INODE_ITEM nlink/isize/nbytes */
4617 err |= LINK_COUNT_ERROR;
4618 error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
4619 root->objectid, inode_id, nlink);
4623 * Just a warning, as dir inode nbytes is just an
4624 * instructive value.
4626 if (!IS_ALIGNED(nbytes, root->nodesize)) {
4627 warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
4628 root->objectid, inode_id, root->nodesize);
4631 if (isize != size) {
4633 error("root %llu DIR INODE [%llu] size(%llu) not equal to %llu",
4634 root->objectid, inode_id, isize, size);
4637 if (nlink != refs) {
4638 err |= LINK_COUNT_ERROR;
4639 error("root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
4640 root->objectid, inode_id, nlink, refs);
4641 } else if (!nlink) {
4645 if (!nbytes && !no_holes && extent_end < isize) {
4646 err |= NBYTES_ERROR;
4647 error("root %llu INODE[%llu] size (%llu) should have a file extent hole",
4648 root->objectid, inode_id, isize);
4651 if (nbytes != extent_size) {
4652 err |= NBYTES_ERROR;
4653 error("root %llu INODE[%llu] nbytes(%llu) not equal to extent_size(%llu)",
4654 root->objectid, inode_id, nbytes, extent_size);
4661 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
4663 struct list_head *cur = rec->backrefs.next;
4664 struct extent_backref *back;
4665 struct tree_backref *tback;
4666 struct data_backref *dback;
4670 while(cur != &rec->backrefs) {
4671 back = to_extent_backref(cur);
4673 if (!back->found_extent_tree) {
4677 if (back->is_data) {
4678 dback = to_data_backref(back);
4679 fprintf(stderr, "Backref %llu %s %llu"
4680 " owner %llu offset %llu num_refs %lu"
4681 " not found in extent tree\n",
4682 (unsigned long long)rec->start,
4683 back->full_backref ?
4685 back->full_backref ?
4686 (unsigned long long)dback->parent:
4687 (unsigned long long)dback->root,
4688 (unsigned long long)dback->owner,
4689 (unsigned long long)dback->offset,
4690 (unsigned long)dback->num_refs);
4692 tback = to_tree_backref(back);
4693 fprintf(stderr, "Backref %llu parent %llu"
4694 " root %llu not found in extent tree\n",
4695 (unsigned long long)rec->start,
4696 (unsigned long long)tback->parent,
4697 (unsigned long long)tback->root);
4700 if (!back->is_data && !back->found_ref) {
4704 tback = to_tree_backref(back);
4705 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4706 (unsigned long long)rec->start,
4707 back->full_backref ? "parent" : "root",
4708 back->full_backref ?
4709 (unsigned long long)tback->parent :
4710 (unsigned long long)tback->root, back);
4712 if (back->is_data) {
4713 dback = to_data_backref(back);
4714 if (dback->found_ref != dback->num_refs) {
4718 fprintf(stderr, "Incorrect local backref count"
4719 " on %llu %s %llu owner %llu"
4720 " offset %llu found %u wanted %u back %p\n",
4721 (unsigned long long)rec->start,
4722 back->full_backref ?
4724 back->full_backref ?
4725 (unsigned long long)dback->parent:
4726 (unsigned long long)dback->root,
4727 (unsigned long long)dback->owner,
4728 (unsigned long long)dback->offset,
4729 dback->found_ref, dback->num_refs, back);
4731 if (dback->disk_bytenr != rec->start) {
4735 fprintf(stderr, "Backref disk bytenr does not"
4736 " match extent record, bytenr=%llu, "
4737 "ref bytenr=%llu\n",
4738 (unsigned long long)rec->start,
4739 (unsigned long long)dback->disk_bytenr);
4742 if (dback->bytes != rec->nr) {
4746 fprintf(stderr, "Backref bytes do not match "
4747 "extent backref, bytenr=%llu, ref "
4748 "bytes=%llu, backref bytes=%llu\n",
4749 (unsigned long long)rec->start,
4750 (unsigned long long)rec->nr,
4751 (unsigned long long)dback->bytes);
4754 if (!back->is_data) {
4757 dback = to_data_backref(back);
4758 found += dback->found_ref;
4761 if (found != rec->refs) {
4765 fprintf(stderr, "Incorrect global backref count "
4766 "on %llu found %llu wanted %llu\n",
4767 (unsigned long long)rec->start,
4768 (unsigned long long)found,
4769 (unsigned long long)rec->refs);
4775 static int free_all_extent_backrefs(struct extent_record *rec)
4777 struct extent_backref *back;
4778 struct list_head *cur;
4779 while (!list_empty(&rec->backrefs)) {
4780 cur = rec->backrefs.next;
4781 back = to_extent_backref(cur);
4788 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4789 struct cache_tree *extent_cache)
4791 struct cache_extent *cache;
4792 struct extent_record *rec;
4795 cache = first_cache_extent(extent_cache);
4798 rec = container_of(cache, struct extent_record, cache);
4799 remove_cache_extent(extent_cache, cache);
4800 free_all_extent_backrefs(rec);
4805 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4806 struct extent_record *rec)
4808 if (rec->content_checked && rec->owner_ref_checked &&
4809 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4810 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4811 !rec->bad_full_backref && !rec->crossing_stripes &&
4812 !rec->wrong_chunk_type) {
4813 remove_cache_extent(extent_cache, &rec->cache);
4814 free_all_extent_backrefs(rec);
4815 list_del_init(&rec->list);
4821 static int check_owner_ref(struct btrfs_root *root,
4822 struct extent_record *rec,
4823 struct extent_buffer *buf)
4825 struct extent_backref *node;
4826 struct tree_backref *back;
4827 struct btrfs_root *ref_root;
4828 struct btrfs_key key;
4829 struct btrfs_path path;
4830 struct extent_buffer *parent;
4835 list_for_each_entry(node, &rec->backrefs, list) {
4838 if (!node->found_ref)
4840 if (node->full_backref)
4842 back = to_tree_backref(node);
4843 if (btrfs_header_owner(buf) == back->root)
4846 BUG_ON(rec->is_root);
4848 /* try to find the block by search corresponding fs tree */
4849 key.objectid = btrfs_header_owner(buf);
4850 key.type = BTRFS_ROOT_ITEM_KEY;
4851 key.offset = (u64)-1;
4853 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4854 if (IS_ERR(ref_root))
4857 level = btrfs_header_level(buf);
4859 btrfs_item_key_to_cpu(buf, &key, 0);
4861 btrfs_node_key_to_cpu(buf, &key, 0);
4863 btrfs_init_path(&path);
4864 path.lowest_level = level + 1;
4865 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4869 parent = path.nodes[level + 1];
4870 if (parent && buf->start == btrfs_node_blockptr(parent,
4871 path.slots[level + 1]))
4874 btrfs_release_path(&path);
4875 return found ? 0 : 1;
4878 static int is_extent_tree_record(struct extent_record *rec)
4880 struct list_head *cur = rec->backrefs.next;
4881 struct extent_backref *node;
4882 struct tree_backref *back;
4885 while(cur != &rec->backrefs) {
4886 node = to_extent_backref(cur);
4890 back = to_tree_backref(node);
4891 if (node->full_backref)
4893 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4900 static int record_bad_block_io(struct btrfs_fs_info *info,
4901 struct cache_tree *extent_cache,
4904 struct extent_record *rec;
4905 struct cache_extent *cache;
4906 struct btrfs_key key;
4908 cache = lookup_cache_extent(extent_cache, start, len);
4912 rec = container_of(cache, struct extent_record, cache);
4913 if (!is_extent_tree_record(rec))
4916 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4917 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4920 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4921 struct extent_buffer *buf, int slot)
4923 if (btrfs_header_level(buf)) {
4924 struct btrfs_key_ptr ptr1, ptr2;
4926 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4927 sizeof(struct btrfs_key_ptr));
4928 read_extent_buffer(buf, &ptr2,
4929 btrfs_node_key_ptr_offset(slot + 1),
4930 sizeof(struct btrfs_key_ptr));
4931 write_extent_buffer(buf, &ptr1,
4932 btrfs_node_key_ptr_offset(slot + 1),
4933 sizeof(struct btrfs_key_ptr));
4934 write_extent_buffer(buf, &ptr2,
4935 btrfs_node_key_ptr_offset(slot),
4936 sizeof(struct btrfs_key_ptr));
4938 struct btrfs_disk_key key;
4939 btrfs_node_key(buf, &key, 0);
4940 btrfs_fixup_low_keys(root, path, &key,
4941 btrfs_header_level(buf) + 1);
4944 struct btrfs_item *item1, *item2;
4945 struct btrfs_key k1, k2;
4946 char *item1_data, *item2_data;
4947 u32 item1_offset, item2_offset, item1_size, item2_size;
4949 item1 = btrfs_item_nr(slot);
4950 item2 = btrfs_item_nr(slot + 1);
4951 btrfs_item_key_to_cpu(buf, &k1, slot);
4952 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4953 item1_offset = btrfs_item_offset(buf, item1);
4954 item2_offset = btrfs_item_offset(buf, item2);
4955 item1_size = btrfs_item_size(buf, item1);
4956 item2_size = btrfs_item_size(buf, item2);
4958 item1_data = malloc(item1_size);
4961 item2_data = malloc(item2_size);
4967 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4968 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4970 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4971 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4975 btrfs_set_item_offset(buf, item1, item2_offset);
4976 btrfs_set_item_offset(buf, item2, item1_offset);
4977 btrfs_set_item_size(buf, item1, item2_size);
4978 btrfs_set_item_size(buf, item2, item1_size);
4980 path->slots[0] = slot;
4981 btrfs_set_item_key_unsafe(root, path, &k2);
4982 path->slots[0] = slot + 1;
4983 btrfs_set_item_key_unsafe(root, path, &k1);
4988 static int fix_key_order(struct btrfs_trans_handle *trans,
4989 struct btrfs_root *root,
4990 struct btrfs_path *path)
4992 struct extent_buffer *buf;
4993 struct btrfs_key k1, k2;
4995 int level = path->lowest_level;
4998 buf = path->nodes[level];
4999 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
5001 btrfs_node_key_to_cpu(buf, &k1, i);
5002 btrfs_node_key_to_cpu(buf, &k2, i + 1);
5004 btrfs_item_key_to_cpu(buf, &k1, i);
5005 btrfs_item_key_to_cpu(buf, &k2, i + 1);
5007 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
5009 ret = swap_values(root, path, buf, i);
5012 btrfs_mark_buffer_dirty(buf);
5018 static int delete_bogus_item(struct btrfs_trans_handle *trans,
5019 struct btrfs_root *root,
5020 struct btrfs_path *path,
5021 struct extent_buffer *buf, int slot)
5023 struct btrfs_key key;
5024 int nritems = btrfs_header_nritems(buf);
5026 btrfs_item_key_to_cpu(buf, &key, slot);
5028 /* These are all the keys we can deal with missing. */
5029 if (key.type != BTRFS_DIR_INDEX_KEY &&
5030 key.type != BTRFS_EXTENT_ITEM_KEY &&
5031 key.type != BTRFS_METADATA_ITEM_KEY &&
5032 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
5033 key.type != BTRFS_EXTENT_DATA_REF_KEY)
5036 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
5037 (unsigned long long)key.objectid, key.type,
5038 (unsigned long long)key.offset, slot, buf->start);
5039 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
5040 btrfs_item_nr_offset(slot + 1),
5041 sizeof(struct btrfs_item) *
5042 (nritems - slot - 1));
5043 btrfs_set_header_nritems(buf, nritems - 1);
5045 struct btrfs_disk_key disk_key;
5047 btrfs_item_key(buf, &disk_key, 0);
5048 btrfs_fixup_low_keys(root, path, &disk_key, 1);
5050 btrfs_mark_buffer_dirty(buf);
5054 static int fix_item_offset(struct btrfs_trans_handle *trans,
5055 struct btrfs_root *root,
5056 struct btrfs_path *path)
5058 struct extent_buffer *buf;
5062 /* We should only get this for leaves */
5063 BUG_ON(path->lowest_level);
5064 buf = path->nodes[0];
5066 for (i = 0; i < btrfs_header_nritems(buf); i++) {
5067 unsigned int shift = 0, offset;
5069 if (i == 0 && btrfs_item_end_nr(buf, i) !=
5070 BTRFS_LEAF_DATA_SIZE(root)) {
5071 if (btrfs_item_end_nr(buf, i) >
5072 BTRFS_LEAF_DATA_SIZE(root)) {
5073 ret = delete_bogus_item(trans, root, path,
5077 fprintf(stderr, "item is off the end of the "
5078 "leaf, can't fix\n");
5082 shift = BTRFS_LEAF_DATA_SIZE(root) -
5083 btrfs_item_end_nr(buf, i);
5084 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
5085 btrfs_item_offset_nr(buf, i - 1)) {
5086 if (btrfs_item_end_nr(buf, i) >
5087 btrfs_item_offset_nr(buf, i - 1)) {
5088 ret = delete_bogus_item(trans, root, path,
5092 fprintf(stderr, "items overlap, can't fix\n");
5096 shift = btrfs_item_offset_nr(buf, i - 1) -
5097 btrfs_item_end_nr(buf, i);
5102 printf("Shifting item nr %d by %u bytes in block %llu\n",
5103 i, shift, (unsigned long long)buf->start);
5104 offset = btrfs_item_offset_nr(buf, i);
5105 memmove_extent_buffer(buf,
5106 btrfs_leaf_data(buf) + offset + shift,
5107 btrfs_leaf_data(buf) + offset,
5108 btrfs_item_size_nr(buf, i));
5109 btrfs_set_item_offset(buf, btrfs_item_nr(i),
5111 btrfs_mark_buffer_dirty(buf);
5115 * We may have moved things, in which case we want to exit so we don't
5116 * write those changes out. Once we have proper abort functionality in
5117 * progs this can be changed to something nicer.
5124 * Attempt to fix basic block failures. If we can't fix it for whatever reason
5125 * then just return -EIO.
5127 static int try_to_fix_bad_block(struct btrfs_root *root,
5128 struct extent_buffer *buf,
5129 enum btrfs_tree_block_status status)
5131 struct btrfs_trans_handle *trans;
5132 struct ulist *roots;
5133 struct ulist_node *node;
5134 struct btrfs_root *search_root;
5135 struct btrfs_path path;
5136 struct ulist_iterator iter;
5137 struct btrfs_key root_key, key;
5140 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
5141 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5144 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
5148 btrfs_init_path(&path);
5149 ULIST_ITER_INIT(&iter);
5150 while ((node = ulist_next(roots, &iter))) {
5151 root_key.objectid = node->val;
5152 root_key.type = BTRFS_ROOT_ITEM_KEY;
5153 root_key.offset = (u64)-1;
5155 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
5162 trans = btrfs_start_transaction(search_root, 0);
5163 if (IS_ERR(trans)) {
5164 ret = PTR_ERR(trans);
5168 path.lowest_level = btrfs_header_level(buf);
5169 path.skip_check_block = 1;
5170 if (path.lowest_level)
5171 btrfs_node_key_to_cpu(buf, &key, 0);
5173 btrfs_item_key_to_cpu(buf, &key, 0);
5174 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
5177 btrfs_commit_transaction(trans, search_root);
5180 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
5181 ret = fix_key_order(trans, search_root, &path);
5182 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
5183 ret = fix_item_offset(trans, search_root, &path);
5185 btrfs_commit_transaction(trans, search_root);
5188 btrfs_release_path(&path);
5189 btrfs_commit_transaction(trans, search_root);
5192 btrfs_release_path(&path);
5196 static int check_block(struct btrfs_root *root,
5197 struct cache_tree *extent_cache,
5198 struct extent_buffer *buf, u64 flags)
5200 struct extent_record *rec;
5201 struct cache_extent *cache;
5202 struct btrfs_key key;
5203 enum btrfs_tree_block_status status;
5207 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
5210 rec = container_of(cache, struct extent_record, cache);
5211 rec->generation = btrfs_header_generation(buf);
5213 level = btrfs_header_level(buf);
5214 if (btrfs_header_nritems(buf) > 0) {
5217 btrfs_item_key_to_cpu(buf, &key, 0);
5219 btrfs_node_key_to_cpu(buf, &key, 0);
5221 rec->info_objectid = key.objectid;
5223 rec->info_level = level;
5225 if (btrfs_is_leaf(buf))
5226 status = btrfs_check_leaf(root, &rec->parent_key, buf);
5228 status = btrfs_check_node(root, &rec->parent_key, buf);
5230 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5232 status = try_to_fix_bad_block(root, buf, status);
5233 if (status != BTRFS_TREE_BLOCK_CLEAN) {
5235 fprintf(stderr, "bad block %llu\n",
5236 (unsigned long long)buf->start);
5239 * Signal to callers we need to start the scan over
5240 * again since we'll have cowed blocks.
5245 rec->content_checked = 1;
5246 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5247 rec->owner_ref_checked = 1;
5249 ret = check_owner_ref(root, rec, buf);
5251 rec->owner_ref_checked = 1;
5255 maybe_free_extent_rec(extent_cache, rec);
5259 static struct tree_backref *find_tree_backref(struct extent_record *rec,
5260 u64 parent, u64 root)
5262 struct list_head *cur = rec->backrefs.next;
5263 struct extent_backref *node;
5264 struct tree_backref *back;
5266 while(cur != &rec->backrefs) {
5267 node = to_extent_backref(cur);
5271 back = to_tree_backref(node);
5273 if (!node->full_backref)
5275 if (parent == back->parent)
5278 if (node->full_backref)
5280 if (back->root == root)
5287 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
5288 u64 parent, u64 root)
5290 struct tree_backref *ref = malloc(sizeof(*ref));
5294 memset(&ref->node, 0, sizeof(ref->node));
5296 ref->parent = parent;
5297 ref->node.full_backref = 1;
5300 ref->node.full_backref = 0;
5302 list_add_tail(&ref->node.list, &rec->backrefs);
5307 static struct data_backref *find_data_backref(struct extent_record *rec,
5308 u64 parent, u64 root,
5309 u64 owner, u64 offset,
5311 u64 disk_bytenr, u64 bytes)
5313 struct list_head *cur = rec->backrefs.next;
5314 struct extent_backref *node;
5315 struct data_backref *back;
5317 while(cur != &rec->backrefs) {
5318 node = to_extent_backref(cur);
5322 back = to_data_backref(node);
5324 if (!node->full_backref)
5326 if (parent == back->parent)
5329 if (node->full_backref)
5331 if (back->root == root && back->owner == owner &&
5332 back->offset == offset) {
5333 if (found_ref && node->found_ref &&
5334 (back->bytes != bytes ||
5335 back->disk_bytenr != disk_bytenr))
5344 static struct data_backref *alloc_data_backref(struct extent_record *rec,
5345 u64 parent, u64 root,
5346 u64 owner, u64 offset,
5349 struct data_backref *ref = malloc(sizeof(*ref));
5353 memset(&ref->node, 0, sizeof(ref->node));
5354 ref->node.is_data = 1;
5357 ref->parent = parent;
5360 ref->node.full_backref = 1;
5364 ref->offset = offset;
5365 ref->node.full_backref = 0;
5367 ref->bytes = max_size;
5370 list_add_tail(&ref->node.list, &rec->backrefs);
5371 if (max_size > rec->max_size)
5372 rec->max_size = max_size;
5376 /* Check if the type of extent matches with its chunk */
5377 static void check_extent_type(struct extent_record *rec)
5379 struct btrfs_block_group_cache *bg_cache;
5381 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
5385 /* data extent, check chunk directly*/
5386 if (!rec->metadata) {
5387 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
5388 rec->wrong_chunk_type = 1;
5392 /* metadata extent, check the obvious case first */
5393 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
5394 BTRFS_BLOCK_GROUP_METADATA))) {
5395 rec->wrong_chunk_type = 1;
5400 * Check SYSTEM extent, as it's also marked as metadata, we can only
5401 * make sure it's a SYSTEM extent by its backref
5403 if (!list_empty(&rec->backrefs)) {
5404 struct extent_backref *node;
5405 struct tree_backref *tback;
5408 node = to_extent_backref(rec->backrefs.next);
5409 if (node->is_data) {
5410 /* tree block shouldn't have data backref */
5411 rec->wrong_chunk_type = 1;
5414 tback = container_of(node, struct tree_backref, node);
5416 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
5417 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
5419 bg_type = BTRFS_BLOCK_GROUP_METADATA;
5420 if (!(bg_cache->flags & bg_type))
5421 rec->wrong_chunk_type = 1;
5426 * Allocate a new extent record, fill default values from @tmpl and insert int
5427 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
5428 * the cache, otherwise it fails.
5430 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
5431 struct extent_record *tmpl)
5433 struct extent_record *rec;
5436 rec = malloc(sizeof(*rec));
5439 rec->start = tmpl->start;
5440 rec->max_size = tmpl->max_size;
5441 rec->nr = max(tmpl->nr, tmpl->max_size);
5442 rec->found_rec = tmpl->found_rec;
5443 rec->content_checked = tmpl->content_checked;
5444 rec->owner_ref_checked = tmpl->owner_ref_checked;
5445 rec->num_duplicates = 0;
5446 rec->metadata = tmpl->metadata;
5447 rec->flag_block_full_backref = FLAG_UNSET;
5448 rec->bad_full_backref = 0;
5449 rec->crossing_stripes = 0;
5450 rec->wrong_chunk_type = 0;
5451 rec->is_root = tmpl->is_root;
5452 rec->refs = tmpl->refs;
5453 rec->extent_item_refs = tmpl->extent_item_refs;
5454 rec->parent_generation = tmpl->parent_generation;
5455 INIT_LIST_HEAD(&rec->backrefs);
5456 INIT_LIST_HEAD(&rec->dups);
5457 INIT_LIST_HEAD(&rec->list);
5458 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
5459 rec->cache.start = tmpl->start;
5460 rec->cache.size = tmpl->nr;
5461 ret = insert_cache_extent(extent_cache, &rec->cache);
5466 bytes_used += rec->nr;
5469 rec->crossing_stripes = check_crossing_stripes(global_info,
5470 rec->start, global_info->tree_root->nodesize);
5471 check_extent_type(rec);
5476 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
5478 * - refs - if found, increase refs
5479 * - is_root - if found, set
5480 * - content_checked - if found, set
5481 * - owner_ref_checked - if found, set
5483 * If not found, create a new one, initialize and insert.
5485 static int add_extent_rec(struct cache_tree *extent_cache,
5486 struct extent_record *tmpl)
5488 struct extent_record *rec;
5489 struct cache_extent *cache;
5493 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
5495 rec = container_of(cache, struct extent_record, cache);
5499 rec->nr = max(tmpl->nr, tmpl->max_size);
5502 * We need to make sure to reset nr to whatever the extent
5503 * record says was the real size, this way we can compare it to
5506 if (tmpl->found_rec) {
5507 if (tmpl->start != rec->start || rec->found_rec) {
5508 struct extent_record *tmp;
5511 if (list_empty(&rec->list))
5512 list_add_tail(&rec->list,
5513 &duplicate_extents);
5516 * We have to do this song and dance in case we
5517 * find an extent record that falls inside of
5518 * our current extent record but does not have
5519 * the same objectid.
5521 tmp = malloc(sizeof(*tmp));
5524 tmp->start = tmpl->start;
5525 tmp->max_size = tmpl->max_size;
5528 tmp->metadata = tmpl->metadata;
5529 tmp->extent_item_refs = tmpl->extent_item_refs;
5530 INIT_LIST_HEAD(&tmp->list);
5531 list_add_tail(&tmp->list, &rec->dups);
5532 rec->num_duplicates++;
5539 if (tmpl->extent_item_refs && !dup) {
5540 if (rec->extent_item_refs) {
5541 fprintf(stderr, "block %llu rec "
5542 "extent_item_refs %llu, passed %llu\n",
5543 (unsigned long long)tmpl->start,
5544 (unsigned long long)
5545 rec->extent_item_refs,
5546 (unsigned long long)tmpl->extent_item_refs);
5548 rec->extent_item_refs = tmpl->extent_item_refs;
5552 if (tmpl->content_checked)
5553 rec->content_checked = 1;
5554 if (tmpl->owner_ref_checked)
5555 rec->owner_ref_checked = 1;
5556 memcpy(&rec->parent_key, &tmpl->parent_key,
5557 sizeof(tmpl->parent_key));
5558 if (tmpl->parent_generation)
5559 rec->parent_generation = tmpl->parent_generation;
5560 if (rec->max_size < tmpl->max_size)
5561 rec->max_size = tmpl->max_size;
5564 * A metadata extent can't cross stripe_len boundary, otherwise
5565 * kernel scrub won't be able to handle it.
5566 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
5570 rec->crossing_stripes = check_crossing_stripes(
5571 global_info, rec->start,
5572 global_info->tree_root->nodesize);
5573 check_extent_type(rec);
5574 maybe_free_extent_rec(extent_cache, rec);
5578 ret = add_extent_rec_nolookup(extent_cache, tmpl);
5583 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
5584 u64 parent, u64 root, int found_ref)
5586 struct extent_record *rec;
5587 struct tree_backref *back;
5588 struct cache_extent *cache;
5591 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5593 struct extent_record tmpl;
5595 memset(&tmpl, 0, sizeof(tmpl));
5596 tmpl.start = bytenr;
5600 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5604 /* really a bug in cache_extent implement now */
5605 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5610 rec = container_of(cache, struct extent_record, cache);
5611 if (rec->start != bytenr) {
5613 * Several cause, from unaligned bytenr to over lapping extents
5618 back = find_tree_backref(rec, parent, root);
5620 back = alloc_tree_backref(rec, parent, root);
5626 if (back->node.found_ref) {
5627 fprintf(stderr, "Extent back ref already exists "
5628 "for %llu parent %llu root %llu \n",
5629 (unsigned long long)bytenr,
5630 (unsigned long long)parent,
5631 (unsigned long long)root);
5633 back->node.found_ref = 1;
5635 if (back->node.found_extent_tree) {
5636 fprintf(stderr, "Extent back ref already exists "
5637 "for %llu parent %llu root %llu \n",
5638 (unsigned long long)bytenr,
5639 (unsigned long long)parent,
5640 (unsigned long long)root);
5642 back->node.found_extent_tree = 1;
5644 check_extent_type(rec);
5645 maybe_free_extent_rec(extent_cache, rec);
5649 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
5650 u64 parent, u64 root, u64 owner, u64 offset,
5651 u32 num_refs, int found_ref, u64 max_size)
5653 struct extent_record *rec;
5654 struct data_backref *back;
5655 struct cache_extent *cache;
5658 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5660 struct extent_record tmpl;
5662 memset(&tmpl, 0, sizeof(tmpl));
5663 tmpl.start = bytenr;
5665 tmpl.max_size = max_size;
5667 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
5671 cache = lookup_cache_extent(extent_cache, bytenr, 1);
5676 rec = container_of(cache, struct extent_record, cache);
5677 if (rec->max_size < max_size)
5678 rec->max_size = max_size;
5681 * If found_ref is set then max_size is the real size and must match the
5682 * existing refs. So if we have already found a ref then we need to
5683 * make sure that this ref matches the existing one, otherwise we need
5684 * to add a new backref so we can notice that the backrefs don't match
5685 * and we need to figure out who is telling the truth. This is to
5686 * account for that awful fsync bug I introduced where we'd end up with
5687 * a btrfs_file_extent_item that would have its length include multiple
5688 * prealloc extents or point inside of a prealloc extent.
5690 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5693 back = alloc_data_backref(rec, parent, root, owner, offset,
5699 BUG_ON(num_refs != 1);
5700 if (back->node.found_ref)
5701 BUG_ON(back->bytes != max_size);
5702 back->node.found_ref = 1;
5703 back->found_ref += 1;
5704 back->bytes = max_size;
5705 back->disk_bytenr = bytenr;
5707 rec->content_checked = 1;
5708 rec->owner_ref_checked = 1;
5710 if (back->node.found_extent_tree) {
5711 fprintf(stderr, "Extent back ref already exists "
5712 "for %llu parent %llu root %llu "
5713 "owner %llu offset %llu num_refs %lu\n",
5714 (unsigned long long)bytenr,
5715 (unsigned long long)parent,
5716 (unsigned long long)root,
5717 (unsigned long long)owner,
5718 (unsigned long long)offset,
5719 (unsigned long)num_refs);
5721 back->num_refs = num_refs;
5722 back->node.found_extent_tree = 1;
5724 maybe_free_extent_rec(extent_cache, rec);
5728 static int add_pending(struct cache_tree *pending,
5729 struct cache_tree *seen, u64 bytenr, u32 size)
5732 ret = add_cache_extent(seen, bytenr, size);
5735 add_cache_extent(pending, bytenr, size);
5739 static int pick_next_pending(struct cache_tree *pending,
5740 struct cache_tree *reada,
5741 struct cache_tree *nodes,
5742 u64 last, struct block_info *bits, int bits_nr,
5745 unsigned long node_start = last;
5746 struct cache_extent *cache;
5749 cache = search_cache_extent(reada, 0);
5751 bits[0].start = cache->start;
5752 bits[0].size = cache->size;
5757 if (node_start > 32768)
5758 node_start -= 32768;
5760 cache = search_cache_extent(nodes, node_start);
5762 cache = search_cache_extent(nodes, 0);
5765 cache = search_cache_extent(pending, 0);
5770 bits[ret].start = cache->start;
5771 bits[ret].size = cache->size;
5772 cache = next_cache_extent(cache);
5774 } while (cache && ret < bits_nr);
5780 bits[ret].start = cache->start;
5781 bits[ret].size = cache->size;
5782 cache = next_cache_extent(cache);
5784 } while (cache && ret < bits_nr);
5786 if (bits_nr - ret > 8) {
5787 u64 lookup = bits[0].start + bits[0].size;
5788 struct cache_extent *next;
5789 next = search_cache_extent(pending, lookup);
5791 if (next->start - lookup > 32768)
5793 bits[ret].start = next->start;
5794 bits[ret].size = next->size;
5795 lookup = next->start + next->size;
5799 next = next_cache_extent(next);
5807 static void free_chunk_record(struct cache_extent *cache)
5809 struct chunk_record *rec;
5811 rec = container_of(cache, struct chunk_record, cache);
5812 list_del_init(&rec->list);
5813 list_del_init(&rec->dextents);
5817 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5819 cache_tree_free_extents(chunk_cache, free_chunk_record);
5822 static void free_device_record(struct rb_node *node)
5824 struct device_record *rec;
5826 rec = container_of(node, struct device_record, node);
5830 FREE_RB_BASED_TREE(device_cache, free_device_record);
5832 int insert_block_group_record(struct block_group_tree *tree,
5833 struct block_group_record *bg_rec)
5837 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5841 list_add_tail(&bg_rec->list, &tree->block_groups);
5845 static void free_block_group_record(struct cache_extent *cache)
5847 struct block_group_record *rec;
5849 rec = container_of(cache, struct block_group_record, cache);
5850 list_del_init(&rec->list);
5854 void free_block_group_tree(struct block_group_tree *tree)
5856 cache_tree_free_extents(&tree->tree, free_block_group_record);
5859 int insert_device_extent_record(struct device_extent_tree *tree,
5860 struct device_extent_record *de_rec)
5865 * Device extent is a bit different from the other extents, because
5866 * the extents which belong to the different devices may have the
5867 * same start and size, so we need use the special extent cache
5868 * search/insert functions.
5870 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5874 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5875 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5879 static void free_device_extent_record(struct cache_extent *cache)
5881 struct device_extent_record *rec;
5883 rec = container_of(cache, struct device_extent_record, cache);
5884 if (!list_empty(&rec->chunk_list))
5885 list_del_init(&rec->chunk_list);
5886 if (!list_empty(&rec->device_list))
5887 list_del_init(&rec->device_list);
5891 void free_device_extent_tree(struct device_extent_tree *tree)
5893 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5896 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5897 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5898 struct extent_buffer *leaf, int slot)
5900 struct btrfs_extent_ref_v0 *ref0;
5901 struct btrfs_key key;
5904 btrfs_item_key_to_cpu(leaf, &key, slot);
5905 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5906 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5907 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5910 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5911 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5917 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5918 struct btrfs_key *key,
5921 struct btrfs_chunk *ptr;
5922 struct chunk_record *rec;
5925 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5926 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5928 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5930 fprintf(stderr, "memory allocation failed\n");
5934 INIT_LIST_HEAD(&rec->list);
5935 INIT_LIST_HEAD(&rec->dextents);
5938 rec->cache.start = key->offset;
5939 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5941 rec->generation = btrfs_header_generation(leaf);
5943 rec->objectid = key->objectid;
5944 rec->type = key->type;
5945 rec->offset = key->offset;
5947 rec->length = rec->cache.size;
5948 rec->owner = btrfs_chunk_owner(leaf, ptr);
5949 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5950 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5951 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5952 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5953 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5954 rec->num_stripes = num_stripes;
5955 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5957 for (i = 0; i < rec->num_stripes; ++i) {
5958 rec->stripes[i].devid =
5959 btrfs_stripe_devid_nr(leaf, ptr, i);
5960 rec->stripes[i].offset =
5961 btrfs_stripe_offset_nr(leaf, ptr, i);
5962 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5963 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5970 static int process_chunk_item(struct cache_tree *chunk_cache,
5971 struct btrfs_key *key, struct extent_buffer *eb,
5974 struct chunk_record *rec;
5975 struct btrfs_chunk *chunk;
5978 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5980 * Do extra check for this chunk item,
5982 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5983 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5984 * and owner<->key_type check.
5986 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5989 error("chunk(%llu, %llu) is not valid, ignore it",
5990 key->offset, btrfs_chunk_length(eb, chunk));
5993 rec = btrfs_new_chunk_record(eb, key, slot);
5994 ret = insert_cache_extent(chunk_cache, &rec->cache);
5996 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5997 rec->offset, rec->length);
6004 static int process_device_item(struct rb_root *dev_cache,
6005 struct btrfs_key *key, struct extent_buffer *eb, int slot)
6007 struct btrfs_dev_item *ptr;
6008 struct device_record *rec;
6011 ptr = btrfs_item_ptr(eb,
6012 slot, struct btrfs_dev_item);
6014 rec = malloc(sizeof(*rec));
6016 fprintf(stderr, "memory allocation failed\n");
6020 rec->devid = key->offset;
6021 rec->generation = btrfs_header_generation(eb);
6023 rec->objectid = key->objectid;
6024 rec->type = key->type;
6025 rec->offset = key->offset;
6027 rec->devid = btrfs_device_id(eb, ptr);
6028 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
6029 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
6031 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
6033 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
6040 struct block_group_record *
6041 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
6044 struct btrfs_block_group_item *ptr;
6045 struct block_group_record *rec;
6047 rec = calloc(1, sizeof(*rec));
6049 fprintf(stderr, "memory allocation failed\n");
6053 rec->cache.start = key->objectid;
6054 rec->cache.size = key->offset;
6056 rec->generation = btrfs_header_generation(leaf);
6058 rec->objectid = key->objectid;
6059 rec->type = key->type;
6060 rec->offset = key->offset;
6062 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
6063 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
6065 INIT_LIST_HEAD(&rec->list);
6070 static int process_block_group_item(struct block_group_tree *block_group_cache,
6071 struct btrfs_key *key,
6072 struct extent_buffer *eb, int slot)
6074 struct block_group_record *rec;
6077 rec = btrfs_new_block_group_record(eb, key, slot);
6078 ret = insert_block_group_record(block_group_cache, rec);
6080 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
6081 rec->objectid, rec->offset);
6088 struct device_extent_record *
6089 btrfs_new_device_extent_record(struct extent_buffer *leaf,
6090 struct btrfs_key *key, int slot)
6092 struct device_extent_record *rec;
6093 struct btrfs_dev_extent *ptr;
6095 rec = calloc(1, sizeof(*rec));
6097 fprintf(stderr, "memory allocation failed\n");
6101 rec->cache.objectid = key->objectid;
6102 rec->cache.start = key->offset;
6104 rec->generation = btrfs_header_generation(leaf);
6106 rec->objectid = key->objectid;
6107 rec->type = key->type;
6108 rec->offset = key->offset;
6110 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
6111 rec->chunk_objecteid =
6112 btrfs_dev_extent_chunk_objectid(leaf, ptr);
6114 btrfs_dev_extent_chunk_offset(leaf, ptr);
6115 rec->length = btrfs_dev_extent_length(leaf, ptr);
6116 rec->cache.size = rec->length;
6118 INIT_LIST_HEAD(&rec->chunk_list);
6119 INIT_LIST_HEAD(&rec->device_list);
6125 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
6126 struct btrfs_key *key, struct extent_buffer *eb,
6129 struct device_extent_record *rec;
6132 rec = btrfs_new_device_extent_record(eb, key, slot);
6133 ret = insert_device_extent_record(dev_extent_cache, rec);
6136 "Device extent[%llu, %llu, %llu] existed.\n",
6137 rec->objectid, rec->offset, rec->length);
6144 static int process_extent_item(struct btrfs_root *root,
6145 struct cache_tree *extent_cache,
6146 struct extent_buffer *eb, int slot)
6148 struct btrfs_extent_item *ei;
6149 struct btrfs_extent_inline_ref *iref;
6150 struct btrfs_extent_data_ref *dref;
6151 struct btrfs_shared_data_ref *sref;
6152 struct btrfs_key key;
6153 struct extent_record tmpl;
6158 u32 item_size = btrfs_item_size_nr(eb, slot);
6164 btrfs_item_key_to_cpu(eb, &key, slot);
6166 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6168 num_bytes = root->nodesize;
6170 num_bytes = key.offset;
6173 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
6174 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
6175 key.objectid, root->sectorsize);
6178 if (item_size < sizeof(*ei)) {
6179 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6180 struct btrfs_extent_item_v0 *ei0;
6181 BUG_ON(item_size != sizeof(*ei0));
6182 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
6183 refs = btrfs_extent_refs_v0(eb, ei0);
6187 memset(&tmpl, 0, sizeof(tmpl));
6188 tmpl.start = key.objectid;
6189 tmpl.nr = num_bytes;
6190 tmpl.extent_item_refs = refs;
6191 tmpl.metadata = metadata;
6193 tmpl.max_size = num_bytes;
6195 return add_extent_rec(extent_cache, &tmpl);
6198 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
6199 refs = btrfs_extent_refs(eb, ei);
6200 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
6204 if (metadata && num_bytes != root->nodesize) {
6205 error("ignore invalid metadata extent, length %llu does not equal to %u",
6206 num_bytes, root->nodesize);
6209 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
6210 error("ignore invalid data extent, length %llu is not aligned to %u",
6211 num_bytes, root->sectorsize);
6215 memset(&tmpl, 0, sizeof(tmpl));
6216 tmpl.start = key.objectid;
6217 tmpl.nr = num_bytes;
6218 tmpl.extent_item_refs = refs;
6219 tmpl.metadata = metadata;
6221 tmpl.max_size = num_bytes;
6222 add_extent_rec(extent_cache, &tmpl);
6224 ptr = (unsigned long)(ei + 1);
6225 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
6226 key.type == BTRFS_EXTENT_ITEM_KEY)
6227 ptr += sizeof(struct btrfs_tree_block_info);
6229 end = (unsigned long)ei + item_size;
6231 iref = (struct btrfs_extent_inline_ref *)ptr;
6232 type = btrfs_extent_inline_ref_type(eb, iref);
6233 offset = btrfs_extent_inline_ref_offset(eb, iref);
6235 case BTRFS_TREE_BLOCK_REF_KEY:
6236 ret = add_tree_backref(extent_cache, key.objectid,
6239 error("add_tree_backref failed: %s",
6242 case BTRFS_SHARED_BLOCK_REF_KEY:
6243 ret = add_tree_backref(extent_cache, key.objectid,
6246 error("add_tree_backref failed: %s",
6249 case BTRFS_EXTENT_DATA_REF_KEY:
6250 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
6251 add_data_backref(extent_cache, key.objectid, 0,
6252 btrfs_extent_data_ref_root(eb, dref),
6253 btrfs_extent_data_ref_objectid(eb,
6255 btrfs_extent_data_ref_offset(eb, dref),
6256 btrfs_extent_data_ref_count(eb, dref),
6259 case BTRFS_SHARED_DATA_REF_KEY:
6260 sref = (struct btrfs_shared_data_ref *)(iref + 1);
6261 add_data_backref(extent_cache, key.objectid, offset,
6263 btrfs_shared_data_ref_count(eb, sref),
6267 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
6268 key.objectid, key.type, num_bytes);
6271 ptr += btrfs_extent_inline_ref_size(type);
6278 static int check_cache_range(struct btrfs_root *root,
6279 struct btrfs_block_group_cache *cache,
6280 u64 offset, u64 bytes)
6282 struct btrfs_free_space *entry;
6288 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
6289 bytenr = btrfs_sb_offset(i);
6290 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
6291 cache->key.objectid, bytenr, 0,
6292 &logical, &nr, &stripe_len);
6297 if (logical[nr] + stripe_len <= offset)
6299 if (offset + bytes <= logical[nr])
6301 if (logical[nr] == offset) {
6302 if (stripe_len >= bytes) {
6306 bytes -= stripe_len;
6307 offset += stripe_len;
6308 } else if (logical[nr] < offset) {
6309 if (logical[nr] + stripe_len >=
6314 bytes = (offset + bytes) -
6315 (logical[nr] + stripe_len);
6316 offset = logical[nr] + stripe_len;
6319 * Could be tricky, the super may land in the
6320 * middle of the area we're checking. First
6321 * check the easiest case, it's at the end.
6323 if (logical[nr] + stripe_len >=
6325 bytes = logical[nr] - offset;
6329 /* Check the left side */
6330 ret = check_cache_range(root, cache,
6332 logical[nr] - offset);
6338 /* Now we continue with the right side */
6339 bytes = (offset + bytes) -
6340 (logical[nr] + stripe_len);
6341 offset = logical[nr] + stripe_len;
6348 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
6350 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
6351 offset, offset+bytes);
6355 if (entry->offset != offset) {
6356 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
6361 if (entry->bytes != bytes) {
6362 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
6363 bytes, entry->bytes, offset);
6367 unlink_free_space(cache->free_space_ctl, entry);
6372 static int verify_space_cache(struct btrfs_root *root,
6373 struct btrfs_block_group_cache *cache)
6375 struct btrfs_path path;
6376 struct extent_buffer *leaf;
6377 struct btrfs_key key;
6381 root = root->fs_info->extent_root;
6383 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
6385 btrfs_init_path(&path);
6386 key.objectid = last;
6388 key.type = BTRFS_EXTENT_ITEM_KEY;
6389 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6394 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6395 ret = btrfs_next_leaf(root, &path);
6403 leaf = path.nodes[0];
6404 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6405 if (key.objectid >= cache->key.offset + cache->key.objectid)
6407 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
6408 key.type != BTRFS_METADATA_ITEM_KEY) {
6413 if (last == key.objectid) {
6414 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6415 last = key.objectid + key.offset;
6417 last = key.objectid + root->nodesize;
6422 ret = check_cache_range(root, cache, last,
6423 key.objectid - last);
6426 if (key.type == BTRFS_EXTENT_ITEM_KEY)
6427 last = key.objectid + key.offset;
6429 last = key.objectid + root->nodesize;
6433 if (last < cache->key.objectid + cache->key.offset)
6434 ret = check_cache_range(root, cache, last,
6435 cache->key.objectid +
6436 cache->key.offset - last);
6439 btrfs_release_path(&path);
6442 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
6443 fprintf(stderr, "There are still entries left in the space "
6451 static int check_space_cache(struct btrfs_root *root)
6453 struct btrfs_block_group_cache *cache;
6454 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
6458 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
6459 btrfs_super_generation(root->fs_info->super_copy) !=
6460 btrfs_super_cache_generation(root->fs_info->super_copy)) {
6461 printf("cache and super generation don't match, space cache "
6462 "will be invalidated\n");
6466 if (ctx.progress_enabled) {
6467 ctx.tp = TASK_FREE_SPACE;
6468 task_start(ctx.info);
6472 cache = btrfs_lookup_first_block_group(root->fs_info, start);
6476 start = cache->key.objectid + cache->key.offset;
6477 if (!cache->free_space_ctl) {
6478 if (btrfs_init_free_space_ctl(cache,
6479 root->sectorsize)) {
6484 btrfs_remove_free_space_cache(cache);
6487 if (btrfs_fs_compat_ro(root->fs_info,
6488 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
6489 ret = exclude_super_stripes(root, cache);
6491 fprintf(stderr, "could not exclude super stripes: %s\n",
6496 ret = load_free_space_tree(root->fs_info, cache);
6497 free_excluded_extents(root, cache);
6499 fprintf(stderr, "could not load free space tree: %s\n",
6506 ret = load_free_space_cache(root->fs_info, cache);
6511 ret = verify_space_cache(root, cache);
6513 fprintf(stderr, "cache appears valid but isn't %Lu\n",
6514 cache->key.objectid);
6519 task_stop(ctx.info);
6521 return error ? -EINVAL : 0;
6524 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
6525 u64 num_bytes, unsigned long leaf_offset,
6526 struct extent_buffer *eb) {
6529 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6531 unsigned long csum_offset;
6535 u64 data_checked = 0;
6541 if (num_bytes % root->sectorsize)
6544 data = malloc(num_bytes);
6548 while (offset < num_bytes) {
6551 read_len = num_bytes - offset;
6552 /* read as much space once a time */
6553 ret = read_extent_data(root, data + offset,
6554 bytenr + offset, &read_len, mirror);
6558 /* verify every 4k data's checksum */
6559 while (data_checked < read_len) {
6561 tmp = offset + data_checked;
6563 csum = btrfs_csum_data(NULL, (char *)data + tmp,
6564 csum, root->sectorsize);
6565 btrfs_csum_final(csum, (u8 *)&csum);
6567 csum_offset = leaf_offset +
6568 tmp / root->sectorsize * csum_size;
6569 read_extent_buffer(eb, (char *)&csum_expected,
6570 csum_offset, csum_size);
6571 /* try another mirror */
6572 if (csum != csum_expected) {
6573 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
6574 mirror, bytenr + tmp,
6575 csum, csum_expected);
6576 num_copies = btrfs_num_copies(
6577 &root->fs_info->mapping_tree,
6579 if (mirror < num_copies - 1) {
6584 data_checked += root->sectorsize;
6593 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
6596 struct btrfs_path path;
6597 struct extent_buffer *leaf;
6598 struct btrfs_key key;
6601 btrfs_init_path(&path);
6602 key.objectid = bytenr;
6603 key.type = BTRFS_EXTENT_ITEM_KEY;
6604 key.offset = (u64)-1;
6607 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
6610 fprintf(stderr, "Error looking up extent record %d\n", ret);
6611 btrfs_release_path(&path);
6614 if (path.slots[0] > 0) {
6617 ret = btrfs_prev_leaf(root, &path);
6620 } else if (ret > 0) {
6627 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6630 * Block group items come before extent items if they have the same
6631 * bytenr, so walk back one more just in case. Dear future traveller,
6632 * first congrats on mastering time travel. Now if it's not too much
6633 * trouble could you go back to 2006 and tell Chris to make the
6634 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
6635 * EXTENT_ITEM_KEY please?
6637 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
6638 if (path.slots[0] > 0) {
6641 ret = btrfs_prev_leaf(root, &path);
6644 } else if (ret > 0) {
6649 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
6653 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6654 ret = btrfs_next_leaf(root, &path);
6656 fprintf(stderr, "Error going to next leaf "
6658 btrfs_release_path(&path);
6664 leaf = path.nodes[0];
6665 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6666 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
6670 if (key.objectid + key.offset < bytenr) {
6674 if (key.objectid > bytenr + num_bytes)
6677 if (key.objectid == bytenr) {
6678 if (key.offset >= num_bytes) {
6682 num_bytes -= key.offset;
6683 bytenr += key.offset;
6684 } else if (key.objectid < bytenr) {
6685 if (key.objectid + key.offset >= bytenr + num_bytes) {
6689 num_bytes = (bytenr + num_bytes) -
6690 (key.objectid + key.offset);
6691 bytenr = key.objectid + key.offset;
6693 if (key.objectid + key.offset < bytenr + num_bytes) {
6694 u64 new_start = key.objectid + key.offset;
6695 u64 new_bytes = bytenr + num_bytes - new_start;
6698 * Weird case, the extent is in the middle of
6699 * our range, we'll have to search one side
6700 * and then the other. Not sure if this happens
6701 * in real life, but no harm in coding it up
6702 * anyway just in case.
6704 btrfs_release_path(&path);
6705 ret = check_extent_exists(root, new_start,
6708 fprintf(stderr, "Right section didn't "
6712 num_bytes = key.objectid - bytenr;
6715 num_bytes = key.objectid - bytenr;
6722 if (num_bytes && !ret) {
6723 fprintf(stderr, "There are no extents for csum range "
6724 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6728 btrfs_release_path(&path);
6732 static int check_csums(struct btrfs_root *root)
6734 struct btrfs_path path;
6735 struct extent_buffer *leaf;
6736 struct btrfs_key key;
6737 u64 offset = 0, num_bytes = 0;
6738 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6742 unsigned long leaf_offset;
6744 root = root->fs_info->csum_root;
6745 if (!extent_buffer_uptodate(root->node)) {
6746 fprintf(stderr, "No valid csum tree found\n");
6750 btrfs_init_path(&path);
6751 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6752 key.type = BTRFS_EXTENT_CSUM_KEY;
6754 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6756 fprintf(stderr, "Error searching csum tree %d\n", ret);
6757 btrfs_release_path(&path);
6761 if (ret > 0 && path.slots[0])
6766 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6767 ret = btrfs_next_leaf(root, &path);
6769 fprintf(stderr, "Error going to next leaf "
6776 leaf = path.nodes[0];
6778 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6779 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6784 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6785 csum_size) * root->sectorsize;
6786 if (!check_data_csum)
6787 goto skip_csum_check;
6788 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6789 ret = check_extent_csums(root, key.offset, data_len,
6795 offset = key.offset;
6796 } else if (key.offset != offset + num_bytes) {
6797 ret = check_extent_exists(root, offset, num_bytes);
6799 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6800 "there is no extent record\n",
6801 offset, offset+num_bytes);
6804 offset = key.offset;
6807 num_bytes += data_len;
6811 btrfs_release_path(&path);
6815 static int is_dropped_key(struct btrfs_key *key,
6816 struct btrfs_key *drop_key) {
6817 if (key->objectid < drop_key->objectid)
6819 else if (key->objectid == drop_key->objectid) {
6820 if (key->type < drop_key->type)
6822 else if (key->type == drop_key->type) {
6823 if (key->offset < drop_key->offset)
6831 * Here are the rules for FULL_BACKREF.
6833 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6834 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6836 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6837 * if it happened after the relocation occurred since we'll have dropped the
6838 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6839 * have no real way to know for sure.
6841 * We process the blocks one root at a time, and we start from the lowest root
6842 * objectid and go to the highest. So we can just lookup the owner backref for
6843 * the record and if we don't find it then we know it doesn't exist and we have
6846 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6847 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6848 * be set or not and then we can check later once we've gathered all the refs.
6850 static int calc_extent_flag(struct btrfs_root *root,
6851 struct cache_tree *extent_cache,
6852 struct extent_buffer *buf,
6853 struct root_item_record *ri,
6856 struct extent_record *rec;
6857 struct cache_extent *cache;
6858 struct tree_backref *tback;
6861 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6862 /* we have added this extent before */
6866 rec = container_of(cache, struct extent_record, cache);
6869 * Except file/reloc tree, we can not have
6872 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6877 if (buf->start == ri->bytenr)
6880 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6883 owner = btrfs_header_owner(buf);
6884 if (owner == ri->objectid)
6887 tback = find_tree_backref(rec, 0, owner);
6892 if (rec->flag_block_full_backref != FLAG_UNSET &&
6893 rec->flag_block_full_backref != 0)
6894 rec->bad_full_backref = 1;
6897 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6898 if (rec->flag_block_full_backref != FLAG_UNSET &&
6899 rec->flag_block_full_backref != 1)
6900 rec->bad_full_backref = 1;
6904 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6906 fprintf(stderr, "Invalid key type(");
6907 print_key_type(stderr, 0, key_type);
6908 fprintf(stderr, ") found in root(");
6909 print_objectid(stderr, rootid, 0);
6910 fprintf(stderr, ")\n");
6914 * Check if the key is valid with its extent buffer.
6916 * This is a early check in case invalid key exists in a extent buffer
6917 * This is not comprehensive yet, but should prevent wrong key/item passed
6920 static int check_type_with_root(u64 rootid, u8 key_type)
6923 /* Only valid in chunk tree */
6924 case BTRFS_DEV_ITEM_KEY:
6925 case BTRFS_CHUNK_ITEM_KEY:
6926 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6929 /* valid in csum and log tree */
6930 case BTRFS_CSUM_TREE_OBJECTID:
6931 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6935 case BTRFS_EXTENT_ITEM_KEY:
6936 case BTRFS_METADATA_ITEM_KEY:
6937 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6938 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6941 case BTRFS_ROOT_ITEM_KEY:
6942 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6945 case BTRFS_DEV_EXTENT_KEY:
6946 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6952 report_mismatch_key_root(key_type, rootid);
6956 static int run_next_block(struct btrfs_root *root,
6957 struct block_info *bits,
6960 struct cache_tree *pending,
6961 struct cache_tree *seen,
6962 struct cache_tree *reada,
6963 struct cache_tree *nodes,
6964 struct cache_tree *extent_cache,
6965 struct cache_tree *chunk_cache,
6966 struct rb_root *dev_cache,
6967 struct block_group_tree *block_group_cache,
6968 struct device_extent_tree *dev_extent_cache,
6969 struct root_item_record *ri)
6971 struct extent_buffer *buf;
6972 struct extent_record *rec = NULL;
6983 struct btrfs_key key;
6984 struct cache_extent *cache;
6987 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6988 bits_nr, &reada_bits);
6993 for(i = 0; i < nritems; i++) {
6994 ret = add_cache_extent(reada, bits[i].start,
6999 /* fixme, get the parent transid */
7000 readahead_tree_block(root, bits[i].start,
7004 *last = bits[0].start;
7005 bytenr = bits[0].start;
7006 size = bits[0].size;
7008 cache = lookup_cache_extent(pending, bytenr, size);
7010 remove_cache_extent(pending, cache);
7013 cache = lookup_cache_extent(reada, bytenr, size);
7015 remove_cache_extent(reada, cache);
7018 cache = lookup_cache_extent(nodes, bytenr, size);
7020 remove_cache_extent(nodes, cache);
7023 cache = lookup_cache_extent(extent_cache, bytenr, size);
7025 rec = container_of(cache, struct extent_record, cache);
7026 gen = rec->parent_generation;
7029 /* fixme, get the real parent transid */
7030 buf = read_tree_block(root, bytenr, size, gen);
7031 if (!extent_buffer_uptodate(buf)) {
7032 record_bad_block_io(root->fs_info,
7033 extent_cache, bytenr, size);
7037 nritems = btrfs_header_nritems(buf);
7040 if (!init_extent_tree) {
7041 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
7042 btrfs_header_level(buf), 1, NULL,
7045 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7047 fprintf(stderr, "Couldn't calc extent flags\n");
7048 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7053 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
7055 fprintf(stderr, "Couldn't calc extent flags\n");
7056 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7060 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7062 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
7063 ri->objectid == btrfs_header_owner(buf)) {
7065 * Ok we got to this block from it's original owner and
7066 * we have FULL_BACKREF set. Relocation can leave
7067 * converted blocks over so this is altogether possible,
7068 * however it's not possible if the generation > the
7069 * last snapshot, so check for this case.
7071 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
7072 btrfs_header_generation(buf) > ri->last_snapshot) {
7073 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7074 rec->bad_full_backref = 1;
7079 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
7080 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
7081 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7082 rec->bad_full_backref = 1;
7086 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
7087 rec->flag_block_full_backref = 1;
7091 rec->flag_block_full_backref = 0;
7093 owner = btrfs_header_owner(buf);
7096 ret = check_block(root, extent_cache, buf, flags);
7100 if (btrfs_is_leaf(buf)) {
7101 btree_space_waste += btrfs_leaf_free_space(root, buf);
7102 for (i = 0; i < nritems; i++) {
7103 struct btrfs_file_extent_item *fi;
7104 btrfs_item_key_to_cpu(buf, &key, i);
7106 * Check key type against the leaf owner.
7107 * Could filter quite a lot of early error if
7110 if (check_type_with_root(btrfs_header_owner(buf),
7112 fprintf(stderr, "ignoring invalid key\n");
7115 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
7116 process_extent_item(root, extent_cache, buf,
7120 if (key.type == BTRFS_METADATA_ITEM_KEY) {
7121 process_extent_item(root, extent_cache, buf,
7125 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
7127 btrfs_item_size_nr(buf, i);
7130 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7131 process_chunk_item(chunk_cache, &key, buf, i);
7134 if (key.type == BTRFS_DEV_ITEM_KEY) {
7135 process_device_item(dev_cache, &key, buf, i);
7138 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
7139 process_block_group_item(block_group_cache,
7143 if (key.type == BTRFS_DEV_EXTENT_KEY) {
7144 process_device_extent_item(dev_extent_cache,
7149 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
7150 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7151 process_extent_ref_v0(extent_cache, buf, i);
7158 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
7159 ret = add_tree_backref(extent_cache,
7160 key.objectid, 0, key.offset, 0);
7162 error("add_tree_backref failed: %s",
7166 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
7167 ret = add_tree_backref(extent_cache,
7168 key.objectid, key.offset, 0, 0);
7170 error("add_tree_backref failed: %s",
7174 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
7175 struct btrfs_extent_data_ref *ref;
7176 ref = btrfs_item_ptr(buf, i,
7177 struct btrfs_extent_data_ref);
7178 add_data_backref(extent_cache,
7180 btrfs_extent_data_ref_root(buf, ref),
7181 btrfs_extent_data_ref_objectid(buf,
7183 btrfs_extent_data_ref_offset(buf, ref),
7184 btrfs_extent_data_ref_count(buf, ref),
7185 0, root->sectorsize);
7188 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
7189 struct btrfs_shared_data_ref *ref;
7190 ref = btrfs_item_ptr(buf, i,
7191 struct btrfs_shared_data_ref);
7192 add_data_backref(extent_cache,
7193 key.objectid, key.offset, 0, 0, 0,
7194 btrfs_shared_data_ref_count(buf, ref),
7195 0, root->sectorsize);
7198 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
7199 struct bad_item *bad;
7201 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
7205 bad = malloc(sizeof(struct bad_item));
7208 INIT_LIST_HEAD(&bad->list);
7209 memcpy(&bad->key, &key,
7210 sizeof(struct btrfs_key));
7211 bad->root_id = owner;
7212 list_add_tail(&bad->list, &delete_items);
7215 if (key.type != BTRFS_EXTENT_DATA_KEY)
7217 fi = btrfs_item_ptr(buf, i,
7218 struct btrfs_file_extent_item);
7219 if (btrfs_file_extent_type(buf, fi) ==
7220 BTRFS_FILE_EXTENT_INLINE)
7222 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
7225 data_bytes_allocated +=
7226 btrfs_file_extent_disk_num_bytes(buf, fi);
7227 if (data_bytes_allocated < root->sectorsize) {
7230 data_bytes_referenced +=
7231 btrfs_file_extent_num_bytes(buf, fi);
7232 add_data_backref(extent_cache,
7233 btrfs_file_extent_disk_bytenr(buf, fi),
7234 parent, owner, key.objectid, key.offset -
7235 btrfs_file_extent_offset(buf, fi), 1, 1,
7236 btrfs_file_extent_disk_num_bytes(buf, fi));
7240 struct btrfs_key first_key;
7242 first_key.objectid = 0;
7245 btrfs_item_key_to_cpu(buf, &first_key, 0);
7246 level = btrfs_header_level(buf);
7247 for (i = 0; i < nritems; i++) {
7248 struct extent_record tmpl;
7250 ptr = btrfs_node_blockptr(buf, i);
7251 size = root->nodesize;
7252 btrfs_node_key_to_cpu(buf, &key, i);
7254 if ((level == ri->drop_level)
7255 && is_dropped_key(&key, &ri->drop_key)) {
7260 memset(&tmpl, 0, sizeof(tmpl));
7261 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
7262 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
7267 tmpl.max_size = size;
7268 ret = add_extent_rec(extent_cache, &tmpl);
7272 ret = add_tree_backref(extent_cache, ptr, parent,
7275 error("add_tree_backref failed: %s",
7281 add_pending(nodes, seen, ptr, size);
7283 add_pending(pending, seen, ptr, size);
7286 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
7287 nritems) * sizeof(struct btrfs_key_ptr);
7289 total_btree_bytes += buf->len;
7290 if (fs_root_objectid(btrfs_header_owner(buf)))
7291 total_fs_tree_bytes += buf->len;
7292 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
7293 total_extent_tree_bytes += buf->len;
7294 if (!found_old_backref &&
7295 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
7296 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
7297 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
7298 found_old_backref = 1;
7300 free_extent_buffer(buf);
7304 static int add_root_to_pending(struct extent_buffer *buf,
7305 struct cache_tree *extent_cache,
7306 struct cache_tree *pending,
7307 struct cache_tree *seen,
7308 struct cache_tree *nodes,
7311 struct extent_record tmpl;
7314 if (btrfs_header_level(buf) > 0)
7315 add_pending(nodes, seen, buf->start, buf->len);
7317 add_pending(pending, seen, buf->start, buf->len);
7319 memset(&tmpl, 0, sizeof(tmpl));
7320 tmpl.start = buf->start;
7325 tmpl.max_size = buf->len;
7326 add_extent_rec(extent_cache, &tmpl);
7328 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
7329 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
7330 ret = add_tree_backref(extent_cache, buf->start, buf->start,
7333 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
7338 /* as we fix the tree, we might be deleting blocks that
7339 * we're tracking for repair. This hook makes sure we
7340 * remove any backrefs for blocks as we are fixing them.
7342 static int free_extent_hook(struct btrfs_trans_handle *trans,
7343 struct btrfs_root *root,
7344 u64 bytenr, u64 num_bytes, u64 parent,
7345 u64 root_objectid, u64 owner, u64 offset,
7348 struct extent_record *rec;
7349 struct cache_extent *cache;
7351 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
7353 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
7354 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
7358 rec = container_of(cache, struct extent_record, cache);
7360 struct data_backref *back;
7361 back = find_data_backref(rec, parent, root_objectid, owner,
7362 offset, 1, bytenr, num_bytes);
7365 if (back->node.found_ref) {
7366 back->found_ref -= refs_to_drop;
7368 rec->refs -= refs_to_drop;
7370 if (back->node.found_extent_tree) {
7371 back->num_refs -= refs_to_drop;
7372 if (rec->extent_item_refs)
7373 rec->extent_item_refs -= refs_to_drop;
7375 if (back->found_ref == 0)
7376 back->node.found_ref = 0;
7377 if (back->num_refs == 0)
7378 back->node.found_extent_tree = 0;
7380 if (!back->node.found_extent_tree && back->node.found_ref) {
7381 list_del(&back->node.list);
7385 struct tree_backref *back;
7386 back = find_tree_backref(rec, parent, root_objectid);
7389 if (back->node.found_ref) {
7392 back->node.found_ref = 0;
7394 if (back->node.found_extent_tree) {
7395 if (rec->extent_item_refs)
7396 rec->extent_item_refs--;
7397 back->node.found_extent_tree = 0;
7399 if (!back->node.found_extent_tree && back->node.found_ref) {
7400 list_del(&back->node.list);
7404 maybe_free_extent_rec(extent_cache, rec);
7409 static int delete_extent_records(struct btrfs_trans_handle *trans,
7410 struct btrfs_root *root,
7411 struct btrfs_path *path,
7412 u64 bytenr, u64 new_len)
7414 struct btrfs_key key;
7415 struct btrfs_key found_key;
7416 struct extent_buffer *leaf;
7421 key.objectid = bytenr;
7423 key.offset = (u64)-1;
7426 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
7433 if (path->slots[0] == 0)
7439 leaf = path->nodes[0];
7440 slot = path->slots[0];
7442 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7443 if (found_key.objectid != bytenr)
7446 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
7447 found_key.type != BTRFS_METADATA_ITEM_KEY &&
7448 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
7449 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
7450 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
7451 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
7452 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
7453 btrfs_release_path(path);
7454 if (found_key.type == 0) {
7455 if (found_key.offset == 0)
7457 key.offset = found_key.offset - 1;
7458 key.type = found_key.type;
7460 key.type = found_key.type - 1;
7461 key.offset = (u64)-1;
7465 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
7466 found_key.objectid, found_key.type, found_key.offset);
7468 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
7471 btrfs_release_path(path);
7473 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
7474 found_key.type == BTRFS_METADATA_ITEM_KEY) {
7475 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
7476 found_key.offset : root->nodesize;
7478 ret = btrfs_update_block_group(trans, root, bytenr,
7485 btrfs_release_path(path);
7490 * for a single backref, this will allocate a new extent
7491 * and add the backref to it.
7493 static int record_extent(struct btrfs_trans_handle *trans,
7494 struct btrfs_fs_info *info,
7495 struct btrfs_path *path,
7496 struct extent_record *rec,
7497 struct extent_backref *back,
7498 int allocated, u64 flags)
7501 struct btrfs_root *extent_root = info->extent_root;
7502 struct extent_buffer *leaf;
7503 struct btrfs_key ins_key;
7504 struct btrfs_extent_item *ei;
7505 struct data_backref *dback;
7506 struct btrfs_tree_block_info *bi;
7509 rec->max_size = max_t(u64, rec->max_size,
7510 info->extent_root->nodesize);
7513 u32 item_size = sizeof(*ei);
7516 item_size += sizeof(*bi);
7518 ins_key.objectid = rec->start;
7519 ins_key.offset = rec->max_size;
7520 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
7522 ret = btrfs_insert_empty_item(trans, extent_root, path,
7523 &ins_key, item_size);
7527 leaf = path->nodes[0];
7528 ei = btrfs_item_ptr(leaf, path->slots[0],
7529 struct btrfs_extent_item);
7531 btrfs_set_extent_refs(leaf, ei, 0);
7532 btrfs_set_extent_generation(leaf, ei, rec->generation);
7534 if (back->is_data) {
7535 btrfs_set_extent_flags(leaf, ei,
7536 BTRFS_EXTENT_FLAG_DATA);
7538 struct btrfs_disk_key copy_key;;
7540 bi = (struct btrfs_tree_block_info *)(ei + 1);
7541 memset_extent_buffer(leaf, 0, (unsigned long)bi,
7544 btrfs_set_disk_key_objectid(©_key,
7545 rec->info_objectid);
7546 btrfs_set_disk_key_type(©_key, 0);
7547 btrfs_set_disk_key_offset(©_key, 0);
7549 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
7550 btrfs_set_tree_block_key(leaf, bi, ©_key);
7552 btrfs_set_extent_flags(leaf, ei,
7553 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
7556 btrfs_mark_buffer_dirty(leaf);
7557 ret = btrfs_update_block_group(trans, extent_root, rec->start,
7558 rec->max_size, 1, 0);
7561 btrfs_release_path(path);
7564 if (back->is_data) {
7568 dback = to_data_backref(back);
7569 if (back->full_backref)
7570 parent = dback->parent;
7574 for (i = 0; i < dback->found_ref; i++) {
7575 /* if parent != 0, we're doing a full backref
7576 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
7577 * just makes the backref allocator create a data
7580 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7581 rec->start, rec->max_size,
7585 BTRFS_FIRST_FREE_OBJECTID :
7591 fprintf(stderr, "adding new data backref"
7592 " on %llu %s %llu owner %llu"
7593 " offset %llu found %d\n",
7594 (unsigned long long)rec->start,
7595 back->full_backref ?
7597 back->full_backref ?
7598 (unsigned long long)parent :
7599 (unsigned long long)dback->root,
7600 (unsigned long long)dback->owner,
7601 (unsigned long long)dback->offset,
7605 struct tree_backref *tback;
7607 tback = to_tree_backref(back);
7608 if (back->full_backref)
7609 parent = tback->parent;
7613 ret = btrfs_inc_extent_ref(trans, info->extent_root,
7614 rec->start, rec->max_size,
7615 parent, tback->root, 0, 0);
7616 fprintf(stderr, "adding new tree backref on "
7617 "start %llu len %llu parent %llu root %llu\n",
7618 rec->start, rec->max_size, parent, tback->root);
7621 btrfs_release_path(path);
7625 static struct extent_entry *find_entry(struct list_head *entries,
7626 u64 bytenr, u64 bytes)
7628 struct extent_entry *entry = NULL;
7630 list_for_each_entry(entry, entries, list) {
7631 if (entry->bytenr == bytenr && entry->bytes == bytes)
7638 static struct extent_entry *find_most_right_entry(struct list_head *entries)
7640 struct extent_entry *entry, *best = NULL, *prev = NULL;
7642 list_for_each_entry(entry, entries, list) {
7644 * If there are as many broken entries as entries then we know
7645 * not to trust this particular entry.
7647 if (entry->broken == entry->count)
7651 * Special case, when there are only two entries and 'best' is
7661 * If our current entry == best then we can't be sure our best
7662 * is really the best, so we need to keep searching.
7664 if (best && best->count == entry->count) {
7670 /* Prev == entry, not good enough, have to keep searching */
7671 if (!prev->broken && prev->count == entry->count)
7675 best = (prev->count > entry->count) ? prev : entry;
7676 else if (best->count < entry->count)
7684 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
7685 struct data_backref *dback, struct extent_entry *entry)
7687 struct btrfs_trans_handle *trans;
7688 struct btrfs_root *root;
7689 struct btrfs_file_extent_item *fi;
7690 struct extent_buffer *leaf;
7691 struct btrfs_key key;
7695 key.objectid = dback->root;
7696 key.type = BTRFS_ROOT_ITEM_KEY;
7697 key.offset = (u64)-1;
7698 root = btrfs_read_fs_root(info, &key);
7700 fprintf(stderr, "Couldn't find root for our ref\n");
7705 * The backref points to the original offset of the extent if it was
7706 * split, so we need to search down to the offset we have and then walk
7707 * forward until we find the backref we're looking for.
7709 key.objectid = dback->owner;
7710 key.type = BTRFS_EXTENT_DATA_KEY;
7711 key.offset = dback->offset;
7712 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7714 fprintf(stderr, "Error looking up ref %d\n", ret);
7719 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7720 ret = btrfs_next_leaf(root, path);
7722 fprintf(stderr, "Couldn't find our ref, next\n");
7726 leaf = path->nodes[0];
7727 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7728 if (key.objectid != dback->owner ||
7729 key.type != BTRFS_EXTENT_DATA_KEY) {
7730 fprintf(stderr, "Couldn't find our ref, search\n");
7733 fi = btrfs_item_ptr(leaf, path->slots[0],
7734 struct btrfs_file_extent_item);
7735 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7736 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7738 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7743 btrfs_release_path(path);
7745 trans = btrfs_start_transaction(root, 1);
7747 return PTR_ERR(trans);
7750 * Ok we have the key of the file extent we want to fix, now we can cow
7751 * down to the thing and fix it.
7753 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7755 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7756 key.objectid, key.type, key.offset, ret);
7760 fprintf(stderr, "Well that's odd, we just found this key "
7761 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7766 leaf = path->nodes[0];
7767 fi = btrfs_item_ptr(leaf, path->slots[0],
7768 struct btrfs_file_extent_item);
7770 if (btrfs_file_extent_compression(leaf, fi) &&
7771 dback->disk_bytenr != entry->bytenr) {
7772 fprintf(stderr, "Ref doesn't match the record start and is "
7773 "compressed, please take a btrfs-image of this file "
7774 "system and send it to a btrfs developer so they can "
7775 "complete this functionality for bytenr %Lu\n",
7776 dback->disk_bytenr);
7781 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7782 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7783 } else if (dback->disk_bytenr > entry->bytenr) {
7784 u64 off_diff, offset;
7786 off_diff = dback->disk_bytenr - entry->bytenr;
7787 offset = btrfs_file_extent_offset(leaf, fi);
7788 if (dback->disk_bytenr + offset +
7789 btrfs_file_extent_num_bytes(leaf, fi) >
7790 entry->bytenr + entry->bytes) {
7791 fprintf(stderr, "Ref is past the entry end, please "
7792 "take a btrfs-image of this file system and "
7793 "send it to a btrfs developer, ref %Lu\n",
7794 dback->disk_bytenr);
7799 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7800 btrfs_set_file_extent_offset(leaf, fi, offset);
7801 } else if (dback->disk_bytenr < entry->bytenr) {
7804 offset = btrfs_file_extent_offset(leaf, fi);
7805 if (dback->disk_bytenr + offset < entry->bytenr) {
7806 fprintf(stderr, "Ref is before the entry start, please"
7807 " take a btrfs-image of this file system and "
7808 "send it to a btrfs developer, ref %Lu\n",
7809 dback->disk_bytenr);
7814 offset += dback->disk_bytenr;
7815 offset -= entry->bytenr;
7816 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7817 btrfs_set_file_extent_offset(leaf, fi, offset);
7820 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7823 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7824 * only do this if we aren't using compression, otherwise it's a
7827 if (!btrfs_file_extent_compression(leaf, fi))
7828 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7830 printf("ram bytes may be wrong?\n");
7831 btrfs_mark_buffer_dirty(leaf);
7833 err = btrfs_commit_transaction(trans, root);
7834 btrfs_release_path(path);
7835 return ret ? ret : err;
7838 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7839 struct extent_record *rec)
7841 struct extent_backref *back;
7842 struct data_backref *dback;
7843 struct extent_entry *entry, *best = NULL;
7846 int broken_entries = 0;
7851 * Metadata is easy and the backrefs should always agree on bytenr and
7852 * size, if not we've got bigger issues.
7857 list_for_each_entry(back, &rec->backrefs, list) {
7858 if (back->full_backref || !back->is_data)
7861 dback = to_data_backref(back);
7864 * We only pay attention to backrefs that we found a real
7867 if (dback->found_ref == 0)
7871 * For now we only catch when the bytes don't match, not the
7872 * bytenr. We can easily do this at the same time, but I want
7873 * to have a fs image to test on before we just add repair
7874 * functionality willy-nilly so we know we won't screw up the
7878 entry = find_entry(&entries, dback->disk_bytenr,
7881 entry = malloc(sizeof(struct extent_entry));
7886 memset(entry, 0, sizeof(*entry));
7887 entry->bytenr = dback->disk_bytenr;
7888 entry->bytes = dback->bytes;
7889 list_add_tail(&entry->list, &entries);
7894 * If we only have on entry we may think the entries agree when
7895 * in reality they don't so we have to do some extra checking.
7897 if (dback->disk_bytenr != rec->start ||
7898 dback->bytes != rec->nr || back->broken)
7909 /* Yay all the backrefs agree, carry on good sir */
7910 if (nr_entries <= 1 && !mismatch)
7913 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7914 "%Lu\n", rec->start);
7917 * First we want to see if the backrefs can agree amongst themselves who
7918 * is right, so figure out which one of the entries has the highest
7921 best = find_most_right_entry(&entries);
7924 * Ok so we may have an even split between what the backrefs think, so
7925 * this is where we use the extent ref to see what it thinks.
7928 entry = find_entry(&entries, rec->start, rec->nr);
7929 if (!entry && (!broken_entries || !rec->found_rec)) {
7930 fprintf(stderr, "Backrefs don't agree with each other "
7931 "and extent record doesn't agree with anybody,"
7932 " so we can't fix bytenr %Lu bytes %Lu\n",
7933 rec->start, rec->nr);
7936 } else if (!entry) {
7938 * Ok our backrefs were broken, we'll assume this is the
7939 * correct value and add an entry for this range.
7941 entry = malloc(sizeof(struct extent_entry));
7946 memset(entry, 0, sizeof(*entry));
7947 entry->bytenr = rec->start;
7948 entry->bytes = rec->nr;
7949 list_add_tail(&entry->list, &entries);
7953 best = find_most_right_entry(&entries);
7955 fprintf(stderr, "Backrefs and extent record evenly "
7956 "split on who is right, this is going to "
7957 "require user input to fix bytenr %Lu bytes "
7958 "%Lu\n", rec->start, rec->nr);
7965 * I don't think this can happen currently as we'll abort() if we catch
7966 * this case higher up, but in case somebody removes that we still can't
7967 * deal with it properly here yet, so just bail out of that's the case.
7969 if (best->bytenr != rec->start) {
7970 fprintf(stderr, "Extent start and backref starts don't match, "
7971 "please use btrfs-image on this file system and send "
7972 "it to a btrfs developer so they can make fsck fix "
7973 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7974 rec->start, rec->nr);
7980 * Ok great we all agreed on an extent record, let's go find the real
7981 * references and fix up the ones that don't match.
7983 list_for_each_entry(back, &rec->backrefs, list) {
7984 if (back->full_backref || !back->is_data)
7987 dback = to_data_backref(back);
7990 * Still ignoring backrefs that don't have a real ref attached
7993 if (dback->found_ref == 0)
7996 if (dback->bytes == best->bytes &&
7997 dback->disk_bytenr == best->bytenr)
8000 ret = repair_ref(info, path, dback, best);
8006 * Ok we messed with the actual refs, which means we need to drop our
8007 * entire cache and go back and rescan. I know this is a huge pain and
8008 * adds a lot of extra work, but it's the only way to be safe. Once all
8009 * the backrefs agree we may not need to do anything to the extent
8014 while (!list_empty(&entries)) {
8015 entry = list_entry(entries.next, struct extent_entry, list);
8016 list_del_init(&entry->list);
8022 static int process_duplicates(struct btrfs_root *root,
8023 struct cache_tree *extent_cache,
8024 struct extent_record *rec)
8026 struct extent_record *good, *tmp;
8027 struct cache_extent *cache;
8031 * If we found a extent record for this extent then return, or if we
8032 * have more than one duplicate we are likely going to need to delete
8035 if (rec->found_rec || rec->num_duplicates > 1)
8038 /* Shouldn't happen but just in case */
8039 BUG_ON(!rec->num_duplicates);
8042 * So this happens if we end up with a backref that doesn't match the
8043 * actual extent entry. So either the backref is bad or the extent
8044 * entry is bad. Either way we want to have the extent_record actually
8045 * reflect what we found in the extent_tree, so we need to take the
8046 * duplicate out and use that as the extent_record since the only way we
8047 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
8049 remove_cache_extent(extent_cache, &rec->cache);
8051 good = to_extent_record(rec->dups.next);
8052 list_del_init(&good->list);
8053 INIT_LIST_HEAD(&good->backrefs);
8054 INIT_LIST_HEAD(&good->dups);
8055 good->cache.start = good->start;
8056 good->cache.size = good->nr;
8057 good->content_checked = 0;
8058 good->owner_ref_checked = 0;
8059 good->num_duplicates = 0;
8060 good->refs = rec->refs;
8061 list_splice_init(&rec->backrefs, &good->backrefs);
8063 cache = lookup_cache_extent(extent_cache, good->start,
8067 tmp = container_of(cache, struct extent_record, cache);
8070 * If we find another overlapping extent and it's found_rec is
8071 * set then it's a duplicate and we need to try and delete
8074 if (tmp->found_rec || tmp->num_duplicates > 0) {
8075 if (list_empty(&good->list))
8076 list_add_tail(&good->list,
8077 &duplicate_extents);
8078 good->num_duplicates += tmp->num_duplicates + 1;
8079 list_splice_init(&tmp->dups, &good->dups);
8080 list_del_init(&tmp->list);
8081 list_add_tail(&tmp->list, &good->dups);
8082 remove_cache_extent(extent_cache, &tmp->cache);
8087 * Ok we have another non extent item backed extent rec, so lets
8088 * just add it to this extent and carry on like we did above.
8090 good->refs += tmp->refs;
8091 list_splice_init(&tmp->backrefs, &good->backrefs);
8092 remove_cache_extent(extent_cache, &tmp->cache);
8095 ret = insert_cache_extent(extent_cache, &good->cache);
8098 return good->num_duplicates ? 0 : 1;
8101 static int delete_duplicate_records(struct btrfs_root *root,
8102 struct extent_record *rec)
8104 struct btrfs_trans_handle *trans;
8105 LIST_HEAD(delete_list);
8106 struct btrfs_path path;
8107 struct extent_record *tmp, *good, *n;
8110 struct btrfs_key key;
8112 btrfs_init_path(&path);
8115 /* Find the record that covers all of the duplicates. */
8116 list_for_each_entry(tmp, &rec->dups, list) {
8117 if (good->start < tmp->start)
8119 if (good->nr > tmp->nr)
8122 if (tmp->start + tmp->nr < good->start + good->nr) {
8123 fprintf(stderr, "Ok we have overlapping extents that "
8124 "aren't completely covered by each other, this "
8125 "is going to require more careful thought. "
8126 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
8127 tmp->start, tmp->nr, good->start, good->nr);
8134 list_add_tail(&rec->list, &delete_list);
8136 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
8139 list_move_tail(&tmp->list, &delete_list);
8142 root = root->fs_info->extent_root;
8143 trans = btrfs_start_transaction(root, 1);
8144 if (IS_ERR(trans)) {
8145 ret = PTR_ERR(trans);
8149 list_for_each_entry(tmp, &delete_list, list) {
8150 if (tmp->found_rec == 0)
8152 key.objectid = tmp->start;
8153 key.type = BTRFS_EXTENT_ITEM_KEY;
8154 key.offset = tmp->nr;
8156 /* Shouldn't happen but just in case */
8157 if (tmp->metadata) {
8158 fprintf(stderr, "Well this shouldn't happen, extent "
8159 "record overlaps but is metadata? "
8160 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
8164 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
8170 ret = btrfs_del_item(trans, root, &path);
8173 btrfs_release_path(&path);
8176 err = btrfs_commit_transaction(trans, root);
8180 while (!list_empty(&delete_list)) {
8181 tmp = to_extent_record(delete_list.next);
8182 list_del_init(&tmp->list);
8188 while (!list_empty(&rec->dups)) {
8189 tmp = to_extent_record(rec->dups.next);
8190 list_del_init(&tmp->list);
8194 btrfs_release_path(&path);
8196 if (!ret && !nr_del)
8197 rec->num_duplicates = 0;
8199 return ret ? ret : nr_del;
8202 static int find_possible_backrefs(struct btrfs_fs_info *info,
8203 struct btrfs_path *path,
8204 struct cache_tree *extent_cache,
8205 struct extent_record *rec)
8207 struct btrfs_root *root;
8208 struct extent_backref *back;
8209 struct data_backref *dback;
8210 struct cache_extent *cache;
8211 struct btrfs_file_extent_item *fi;
8212 struct btrfs_key key;
8216 list_for_each_entry(back, &rec->backrefs, list) {
8217 /* Don't care about full backrefs (poor unloved backrefs) */
8218 if (back->full_backref || !back->is_data)
8221 dback = to_data_backref(back);
8223 /* We found this one, we don't need to do a lookup */
8224 if (dback->found_ref)
8227 key.objectid = dback->root;
8228 key.type = BTRFS_ROOT_ITEM_KEY;
8229 key.offset = (u64)-1;
8231 root = btrfs_read_fs_root(info, &key);
8233 /* No root, definitely a bad ref, skip */
8234 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
8236 /* Other err, exit */
8238 return PTR_ERR(root);
8240 key.objectid = dback->owner;
8241 key.type = BTRFS_EXTENT_DATA_KEY;
8242 key.offset = dback->offset;
8243 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8245 btrfs_release_path(path);
8248 /* Didn't find it, we can carry on */
8253 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
8254 struct btrfs_file_extent_item);
8255 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
8256 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
8257 btrfs_release_path(path);
8258 cache = lookup_cache_extent(extent_cache, bytenr, 1);
8260 struct extent_record *tmp;
8261 tmp = container_of(cache, struct extent_record, cache);
8264 * If we found an extent record for the bytenr for this
8265 * particular backref then we can't add it to our
8266 * current extent record. We only want to add backrefs
8267 * that don't have a corresponding extent item in the
8268 * extent tree since they likely belong to this record
8269 * and we need to fix it if it doesn't match bytenrs.
8275 dback->found_ref += 1;
8276 dback->disk_bytenr = bytenr;
8277 dback->bytes = bytes;
8280 * Set this so the verify backref code knows not to trust the
8281 * values in this backref.
8290 * Record orphan data ref into corresponding root.
8292 * Return 0 if the extent item contains data ref and recorded.
8293 * Return 1 if the extent item contains no useful data ref
8294 * On that case, it may contains only shared_dataref or metadata backref
8295 * or the file extent exists(this should be handled by the extent bytenr
8297 * Return <0 if something goes wrong.
8299 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
8300 struct extent_record *rec)
8302 struct btrfs_key key;
8303 struct btrfs_root *dest_root;
8304 struct extent_backref *back;
8305 struct data_backref *dback;
8306 struct orphan_data_extent *orphan;
8307 struct btrfs_path path;
8308 int recorded_data_ref = 0;
8313 btrfs_init_path(&path);
8314 list_for_each_entry(back, &rec->backrefs, list) {
8315 if (back->full_backref || !back->is_data ||
8316 !back->found_extent_tree)
8318 dback = to_data_backref(back);
8319 if (dback->found_ref)
8321 key.objectid = dback->root;
8322 key.type = BTRFS_ROOT_ITEM_KEY;
8323 key.offset = (u64)-1;
8325 dest_root = btrfs_read_fs_root(fs_info, &key);
8327 /* For non-exist root we just skip it */
8328 if (IS_ERR(dest_root) || !dest_root)
8331 key.objectid = dback->owner;
8332 key.type = BTRFS_EXTENT_DATA_KEY;
8333 key.offset = dback->offset;
8335 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
8336 btrfs_release_path(&path);
8338 * For ret < 0, it's OK since the fs-tree may be corrupted,
8339 * we need to record it for inode/file extent rebuild.
8340 * For ret > 0, we record it only for file extent rebuild.
8341 * For ret == 0, the file extent exists but only bytenr
8342 * mismatch, let the original bytenr fix routine to handle,
8348 orphan = malloc(sizeof(*orphan));
8353 INIT_LIST_HEAD(&orphan->list);
8354 orphan->root = dback->root;
8355 orphan->objectid = dback->owner;
8356 orphan->offset = dback->offset;
8357 orphan->disk_bytenr = rec->cache.start;
8358 orphan->disk_len = rec->cache.size;
8359 list_add(&dest_root->orphan_data_extents, &orphan->list);
8360 recorded_data_ref = 1;
8363 btrfs_release_path(&path);
8365 return !recorded_data_ref;
8371 * when an incorrect extent item is found, this will delete
8372 * all of the existing entries for it and recreate them
8373 * based on what the tree scan found.
8375 static int fixup_extent_refs(struct btrfs_fs_info *info,
8376 struct cache_tree *extent_cache,
8377 struct extent_record *rec)
8379 struct btrfs_trans_handle *trans = NULL;
8381 struct btrfs_path path;
8382 struct list_head *cur = rec->backrefs.next;
8383 struct cache_extent *cache;
8384 struct extent_backref *back;
8388 if (rec->flag_block_full_backref)
8389 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8391 btrfs_init_path(&path);
8392 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
8394 * Sometimes the backrefs themselves are so broken they don't
8395 * get attached to any meaningful rec, so first go back and
8396 * check any of our backrefs that we couldn't find and throw
8397 * them into the list if we find the backref so that
8398 * verify_backrefs can figure out what to do.
8400 ret = find_possible_backrefs(info, &path, extent_cache, rec);
8405 /* step one, make sure all of the backrefs agree */
8406 ret = verify_backrefs(info, &path, rec);
8410 trans = btrfs_start_transaction(info->extent_root, 1);
8411 if (IS_ERR(trans)) {
8412 ret = PTR_ERR(trans);
8416 /* step two, delete all the existing records */
8417 ret = delete_extent_records(trans, info->extent_root, &path,
8418 rec->start, rec->max_size);
8423 /* was this block corrupt? If so, don't add references to it */
8424 cache = lookup_cache_extent(info->corrupt_blocks,
8425 rec->start, rec->max_size);
8431 /* step three, recreate all the refs we did find */
8432 while(cur != &rec->backrefs) {
8433 back = to_extent_backref(cur);
8437 * if we didn't find any references, don't create a
8440 if (!back->found_ref)
8443 rec->bad_full_backref = 0;
8444 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
8452 int err = btrfs_commit_transaction(trans, info->extent_root);
8457 btrfs_release_path(&path);
8461 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
8462 struct extent_record *rec)
8464 struct btrfs_trans_handle *trans;
8465 struct btrfs_root *root = fs_info->extent_root;
8466 struct btrfs_path path;
8467 struct btrfs_extent_item *ei;
8468 struct btrfs_key key;
8472 key.objectid = rec->start;
8473 if (rec->metadata) {
8474 key.type = BTRFS_METADATA_ITEM_KEY;
8475 key.offset = rec->info_level;
8477 key.type = BTRFS_EXTENT_ITEM_KEY;
8478 key.offset = rec->max_size;
8481 trans = btrfs_start_transaction(root, 0);
8483 return PTR_ERR(trans);
8485 btrfs_init_path(&path);
8486 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
8488 btrfs_release_path(&path);
8489 btrfs_commit_transaction(trans, root);
8492 fprintf(stderr, "Didn't find extent for %llu\n",
8493 (unsigned long long)rec->start);
8494 btrfs_release_path(&path);
8495 btrfs_commit_transaction(trans, root);
8499 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8500 struct btrfs_extent_item);
8501 flags = btrfs_extent_flags(path.nodes[0], ei);
8502 if (rec->flag_block_full_backref) {
8503 fprintf(stderr, "setting full backref on %llu\n",
8504 (unsigned long long)key.objectid);
8505 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8507 fprintf(stderr, "clearing full backref on %llu\n",
8508 (unsigned long long)key.objectid);
8509 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8511 btrfs_set_extent_flags(path.nodes[0], ei, flags);
8512 btrfs_mark_buffer_dirty(path.nodes[0]);
8513 btrfs_release_path(&path);
8514 return btrfs_commit_transaction(trans, root);
8517 /* right now we only prune from the extent allocation tree */
8518 static int prune_one_block(struct btrfs_trans_handle *trans,
8519 struct btrfs_fs_info *info,
8520 struct btrfs_corrupt_block *corrupt)
8523 struct btrfs_path path;
8524 struct extent_buffer *eb;
8528 int level = corrupt->level + 1;
8530 btrfs_init_path(&path);
8532 /* we want to stop at the parent to our busted block */
8533 path.lowest_level = level;
8535 ret = btrfs_search_slot(trans, info->extent_root,
8536 &corrupt->key, &path, -1, 1);
8541 eb = path.nodes[level];
8548 * hopefully the search gave us the block we want to prune,
8549 * lets try that first
8551 slot = path.slots[level];
8552 found = btrfs_node_blockptr(eb, slot);
8553 if (found == corrupt->cache.start)
8556 nritems = btrfs_header_nritems(eb);
8558 /* the search failed, lets scan this node and hope we find it */
8559 for (slot = 0; slot < nritems; slot++) {
8560 found = btrfs_node_blockptr(eb, slot);
8561 if (found == corrupt->cache.start)
8565 * we couldn't find the bad block. TODO, search all the nodes for pointers
8568 if (eb == info->extent_root->node) {
8573 btrfs_release_path(&path);
8578 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
8579 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
8582 btrfs_release_path(&path);
8586 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
8588 struct btrfs_trans_handle *trans = NULL;
8589 struct cache_extent *cache;
8590 struct btrfs_corrupt_block *corrupt;
8593 cache = search_cache_extent(info->corrupt_blocks, 0);
8597 trans = btrfs_start_transaction(info->extent_root, 1);
8599 return PTR_ERR(trans);
8601 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
8602 prune_one_block(trans, info, corrupt);
8603 remove_cache_extent(info->corrupt_blocks, cache);
8606 return btrfs_commit_transaction(trans, info->extent_root);
8610 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
8612 struct btrfs_block_group_cache *cache;
8617 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
8618 &start, &end, EXTENT_DIRTY);
8621 clear_extent_dirty(&fs_info->free_space_cache, start, end,
8627 cache = btrfs_lookup_first_block_group(fs_info, start);
8632 start = cache->key.objectid + cache->key.offset;
8636 static int check_extent_refs(struct btrfs_root *root,
8637 struct cache_tree *extent_cache)
8639 struct extent_record *rec;
8640 struct cache_extent *cache;
8649 * if we're doing a repair, we have to make sure
8650 * we don't allocate from the problem extents.
8651 * In the worst case, this will be all the
8654 cache = search_cache_extent(extent_cache, 0);
8656 rec = container_of(cache, struct extent_record, cache);
8657 set_extent_dirty(root->fs_info->excluded_extents,
8659 rec->start + rec->max_size - 1,
8661 cache = next_cache_extent(cache);
8664 /* pin down all the corrupted blocks too */
8665 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
8667 set_extent_dirty(root->fs_info->excluded_extents,
8669 cache->start + cache->size - 1,
8671 cache = next_cache_extent(cache);
8673 prune_corrupt_blocks(root->fs_info);
8674 reset_cached_block_groups(root->fs_info);
8677 reset_cached_block_groups(root->fs_info);
8680 * We need to delete any duplicate entries we find first otherwise we
8681 * could mess up the extent tree when we have backrefs that actually
8682 * belong to a different extent item and not the weird duplicate one.
8684 while (repair && !list_empty(&duplicate_extents)) {
8685 rec = to_extent_record(duplicate_extents.next);
8686 list_del_init(&rec->list);
8688 /* Sometimes we can find a backref before we find an actual
8689 * extent, so we need to process it a little bit to see if there
8690 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
8691 * if this is a backref screwup. If we need to delete stuff
8692 * process_duplicates() will return 0, otherwise it will return
8695 if (process_duplicates(root, extent_cache, rec))
8697 ret = delete_duplicate_records(root, rec);
8701 * delete_duplicate_records will return the number of entries
8702 * deleted, so if it's greater than 0 then we know we actually
8703 * did something and we need to remove.
8717 cache = search_cache_extent(extent_cache, 0);
8720 rec = container_of(cache, struct extent_record, cache);
8721 if (rec->num_duplicates) {
8722 fprintf(stderr, "extent item %llu has multiple extent "
8723 "items\n", (unsigned long long)rec->start);
8728 if (rec->refs != rec->extent_item_refs) {
8729 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8730 (unsigned long long)rec->start,
8731 (unsigned long long)rec->nr);
8732 fprintf(stderr, "extent item %llu, found %llu\n",
8733 (unsigned long long)rec->extent_item_refs,
8734 (unsigned long long)rec->refs);
8735 ret = record_orphan_data_extents(root->fs_info, rec);
8742 * we can't use the extent to repair file
8743 * extent, let the fallback method handle it.
8745 if (!fixed && repair) {
8746 ret = fixup_extent_refs(
8757 if (all_backpointers_checked(rec, 1)) {
8758 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8759 (unsigned long long)rec->start,
8760 (unsigned long long)rec->nr);
8762 if (!fixed && !recorded && repair) {
8763 ret = fixup_extent_refs(root->fs_info,
8772 if (!rec->owner_ref_checked) {
8773 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8774 (unsigned long long)rec->start,
8775 (unsigned long long)rec->nr);
8776 if (!fixed && !recorded && repair) {
8777 ret = fixup_extent_refs(root->fs_info,
8786 if (rec->bad_full_backref) {
8787 fprintf(stderr, "bad full backref, on [%llu]\n",
8788 (unsigned long long)rec->start);
8790 ret = fixup_extent_flags(root->fs_info, rec);
8799 * Although it's not a extent ref's problem, we reuse this
8800 * routine for error reporting.
8801 * No repair function yet.
8803 if (rec->crossing_stripes) {
8805 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8806 rec->start, rec->start + rec->max_size);
8811 if (rec->wrong_chunk_type) {
8813 "bad extent [%llu, %llu), type mismatch with chunk\n",
8814 rec->start, rec->start + rec->max_size);
8819 remove_cache_extent(extent_cache, cache);
8820 free_all_extent_backrefs(rec);
8821 if (!init_extent_tree && repair && (!cur_err || fixed))
8822 clear_extent_dirty(root->fs_info->excluded_extents,
8824 rec->start + rec->max_size - 1,
8830 if (ret && ret != -EAGAIN) {
8831 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8834 struct btrfs_trans_handle *trans;
8836 root = root->fs_info->extent_root;
8837 trans = btrfs_start_transaction(root, 1);
8838 if (IS_ERR(trans)) {
8839 ret = PTR_ERR(trans);
8843 btrfs_fix_block_accounting(trans, root);
8844 ret = btrfs_commit_transaction(trans, root);
8849 fprintf(stderr, "repaired damaged extent references\n");
8855 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8859 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8860 stripe_size = length;
8861 stripe_size /= num_stripes;
8862 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8863 stripe_size = length * 2;
8864 stripe_size /= num_stripes;
8865 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8866 stripe_size = length;
8867 stripe_size /= (num_stripes - 1);
8868 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8869 stripe_size = length;
8870 stripe_size /= (num_stripes - 2);
8872 stripe_size = length;
8878 * Check the chunk with its block group/dev list ref:
8879 * Return 0 if all refs seems valid.
8880 * Return 1 if part of refs seems valid, need later check for rebuild ref
8881 * like missing block group and needs to search extent tree to rebuild them.
8882 * Return -1 if essential refs are missing and unable to rebuild.
8884 static int check_chunk_refs(struct chunk_record *chunk_rec,
8885 struct block_group_tree *block_group_cache,
8886 struct device_extent_tree *dev_extent_cache,
8889 struct cache_extent *block_group_item;
8890 struct block_group_record *block_group_rec;
8891 struct cache_extent *dev_extent_item;
8892 struct device_extent_record *dev_extent_rec;
8896 int metadump_v2 = 0;
8900 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8903 if (block_group_item) {
8904 block_group_rec = container_of(block_group_item,
8905 struct block_group_record,
8907 if (chunk_rec->length != block_group_rec->offset ||
8908 chunk_rec->offset != block_group_rec->objectid ||
8910 chunk_rec->type_flags != block_group_rec->flags)) {
8913 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8914 chunk_rec->objectid,
8919 chunk_rec->type_flags,
8920 block_group_rec->objectid,
8921 block_group_rec->type,
8922 block_group_rec->offset,
8923 block_group_rec->offset,
8924 block_group_rec->objectid,
8925 block_group_rec->flags);
8928 list_del_init(&block_group_rec->list);
8929 chunk_rec->bg_rec = block_group_rec;
8934 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8935 chunk_rec->objectid,
8940 chunk_rec->type_flags);
8947 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8948 chunk_rec->num_stripes);
8949 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8950 devid = chunk_rec->stripes[i].devid;
8951 offset = chunk_rec->stripes[i].offset;
8952 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8953 devid, offset, length);
8954 if (dev_extent_item) {
8955 dev_extent_rec = container_of(dev_extent_item,
8956 struct device_extent_record,
8958 if (dev_extent_rec->objectid != devid ||
8959 dev_extent_rec->offset != offset ||
8960 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8961 dev_extent_rec->length != length) {
8964 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8965 chunk_rec->objectid,
8968 chunk_rec->stripes[i].devid,
8969 chunk_rec->stripes[i].offset,
8970 dev_extent_rec->objectid,
8971 dev_extent_rec->offset,
8972 dev_extent_rec->length);
8975 list_move(&dev_extent_rec->chunk_list,
8976 &chunk_rec->dextents);
8981 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8982 chunk_rec->objectid,
8985 chunk_rec->stripes[i].devid,
8986 chunk_rec->stripes[i].offset);
8993 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8994 int check_chunks(struct cache_tree *chunk_cache,
8995 struct block_group_tree *block_group_cache,
8996 struct device_extent_tree *dev_extent_cache,
8997 struct list_head *good, struct list_head *bad,
8998 struct list_head *rebuild, int silent)
9000 struct cache_extent *chunk_item;
9001 struct chunk_record *chunk_rec;
9002 struct block_group_record *bg_rec;
9003 struct device_extent_record *dext_rec;
9007 chunk_item = first_cache_extent(chunk_cache);
9008 while (chunk_item) {
9009 chunk_rec = container_of(chunk_item, struct chunk_record,
9011 err = check_chunk_refs(chunk_rec, block_group_cache,
9012 dev_extent_cache, silent);
9015 if (err == 0 && good)
9016 list_add_tail(&chunk_rec->list, good);
9017 if (err > 0 && rebuild)
9018 list_add_tail(&chunk_rec->list, rebuild);
9020 list_add_tail(&chunk_rec->list, bad);
9021 chunk_item = next_cache_extent(chunk_item);
9024 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
9027 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
9035 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
9039 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
9050 static int check_device_used(struct device_record *dev_rec,
9051 struct device_extent_tree *dext_cache)
9053 struct cache_extent *cache;
9054 struct device_extent_record *dev_extent_rec;
9057 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
9059 dev_extent_rec = container_of(cache,
9060 struct device_extent_record,
9062 if (dev_extent_rec->objectid != dev_rec->devid)
9065 list_del_init(&dev_extent_rec->device_list);
9066 total_byte += dev_extent_rec->length;
9067 cache = next_cache_extent(cache);
9070 if (total_byte != dev_rec->byte_used) {
9072 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
9073 total_byte, dev_rec->byte_used, dev_rec->objectid,
9074 dev_rec->type, dev_rec->offset);
9081 /* check btrfs_dev_item -> btrfs_dev_extent */
9082 static int check_devices(struct rb_root *dev_cache,
9083 struct device_extent_tree *dev_extent_cache)
9085 struct rb_node *dev_node;
9086 struct device_record *dev_rec;
9087 struct device_extent_record *dext_rec;
9091 dev_node = rb_first(dev_cache);
9093 dev_rec = container_of(dev_node, struct device_record, node);
9094 err = check_device_used(dev_rec, dev_extent_cache);
9098 dev_node = rb_next(dev_node);
9100 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
9103 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
9104 dext_rec->objectid, dext_rec->offset, dext_rec->length);
9111 static int add_root_item_to_list(struct list_head *head,
9112 u64 objectid, u64 bytenr, u64 last_snapshot,
9113 u8 level, u8 drop_level,
9114 int level_size, struct btrfs_key *drop_key)
9117 struct root_item_record *ri_rec;
9118 ri_rec = malloc(sizeof(*ri_rec));
9121 ri_rec->bytenr = bytenr;
9122 ri_rec->objectid = objectid;
9123 ri_rec->level = level;
9124 ri_rec->level_size = level_size;
9125 ri_rec->drop_level = drop_level;
9126 ri_rec->last_snapshot = last_snapshot;
9128 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
9129 list_add_tail(&ri_rec->list, head);
9134 static void free_root_item_list(struct list_head *list)
9136 struct root_item_record *ri_rec;
9138 while (!list_empty(list)) {
9139 ri_rec = list_first_entry(list, struct root_item_record,
9141 list_del_init(&ri_rec->list);
9146 static int deal_root_from_list(struct list_head *list,
9147 struct btrfs_root *root,
9148 struct block_info *bits,
9150 struct cache_tree *pending,
9151 struct cache_tree *seen,
9152 struct cache_tree *reada,
9153 struct cache_tree *nodes,
9154 struct cache_tree *extent_cache,
9155 struct cache_tree *chunk_cache,
9156 struct rb_root *dev_cache,
9157 struct block_group_tree *block_group_cache,
9158 struct device_extent_tree *dev_extent_cache)
9163 while (!list_empty(list)) {
9164 struct root_item_record *rec;
9165 struct extent_buffer *buf;
9166 rec = list_entry(list->next,
9167 struct root_item_record, list);
9169 buf = read_tree_block(root->fs_info->tree_root,
9170 rec->bytenr, rec->level_size, 0);
9171 if (!extent_buffer_uptodate(buf)) {
9172 free_extent_buffer(buf);
9176 ret = add_root_to_pending(buf, extent_cache, pending,
9177 seen, nodes, rec->objectid);
9181 * To rebuild extent tree, we need deal with snapshot
9182 * one by one, otherwise we deal with node firstly which
9183 * can maximize readahead.
9186 ret = run_next_block(root, bits, bits_nr, &last,
9187 pending, seen, reada, nodes,
9188 extent_cache, chunk_cache,
9189 dev_cache, block_group_cache,
9190 dev_extent_cache, rec);
9194 free_extent_buffer(buf);
9195 list_del(&rec->list);
9201 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
9202 reada, nodes, extent_cache, chunk_cache,
9203 dev_cache, block_group_cache,
9204 dev_extent_cache, NULL);
9214 static int check_chunks_and_extents(struct btrfs_root *root)
9216 struct rb_root dev_cache;
9217 struct cache_tree chunk_cache;
9218 struct block_group_tree block_group_cache;
9219 struct device_extent_tree dev_extent_cache;
9220 struct cache_tree extent_cache;
9221 struct cache_tree seen;
9222 struct cache_tree pending;
9223 struct cache_tree reada;
9224 struct cache_tree nodes;
9225 struct extent_io_tree excluded_extents;
9226 struct cache_tree corrupt_blocks;
9227 struct btrfs_path path;
9228 struct btrfs_key key;
9229 struct btrfs_key found_key;
9231 struct block_info *bits;
9233 struct extent_buffer *leaf;
9235 struct btrfs_root_item ri;
9236 struct list_head dropping_trees;
9237 struct list_head normal_trees;
9238 struct btrfs_root *root1;
9243 dev_cache = RB_ROOT;
9244 cache_tree_init(&chunk_cache);
9245 block_group_tree_init(&block_group_cache);
9246 device_extent_tree_init(&dev_extent_cache);
9248 cache_tree_init(&extent_cache);
9249 cache_tree_init(&seen);
9250 cache_tree_init(&pending);
9251 cache_tree_init(&nodes);
9252 cache_tree_init(&reada);
9253 cache_tree_init(&corrupt_blocks);
9254 extent_io_tree_init(&excluded_extents);
9255 INIT_LIST_HEAD(&dropping_trees);
9256 INIT_LIST_HEAD(&normal_trees);
9259 root->fs_info->excluded_extents = &excluded_extents;
9260 root->fs_info->fsck_extent_cache = &extent_cache;
9261 root->fs_info->free_extent_hook = free_extent_hook;
9262 root->fs_info->corrupt_blocks = &corrupt_blocks;
9266 bits = malloc(bits_nr * sizeof(struct block_info));
9272 if (ctx.progress_enabled) {
9273 ctx.tp = TASK_EXTENTS;
9274 task_start(ctx.info);
9278 root1 = root->fs_info->tree_root;
9279 level = btrfs_header_level(root1->node);
9280 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9281 root1->node->start, 0, level, 0,
9282 root1->nodesize, NULL);
9285 root1 = root->fs_info->chunk_root;
9286 level = btrfs_header_level(root1->node);
9287 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
9288 root1->node->start, 0, level, 0,
9289 root1->nodesize, NULL);
9292 btrfs_init_path(&path);
9295 key.type = BTRFS_ROOT_ITEM_KEY;
9296 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
9301 leaf = path.nodes[0];
9302 slot = path.slots[0];
9303 if (slot >= btrfs_header_nritems(path.nodes[0])) {
9304 ret = btrfs_next_leaf(root, &path);
9307 leaf = path.nodes[0];
9308 slot = path.slots[0];
9310 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
9311 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
9312 unsigned long offset;
9315 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
9316 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
9317 last_snapshot = btrfs_root_last_snapshot(&ri);
9318 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
9319 level = btrfs_root_level(&ri);
9320 level_size = root->nodesize;
9321 ret = add_root_item_to_list(&normal_trees,
9323 btrfs_root_bytenr(&ri),
9324 last_snapshot, level,
9325 0, level_size, NULL);
9329 level = btrfs_root_level(&ri);
9330 level_size = root->nodesize;
9331 objectid = found_key.objectid;
9332 btrfs_disk_key_to_cpu(&found_key,
9334 ret = add_root_item_to_list(&dropping_trees,
9336 btrfs_root_bytenr(&ri),
9337 last_snapshot, level,
9339 level_size, &found_key);
9346 btrfs_release_path(&path);
9349 * check_block can return -EAGAIN if it fixes something, please keep
9350 * this in mind when dealing with return values from these functions, if
9351 * we get -EAGAIN we want to fall through and restart the loop.
9353 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
9354 &seen, &reada, &nodes, &extent_cache,
9355 &chunk_cache, &dev_cache, &block_group_cache,
9362 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
9363 &pending, &seen, &reada, &nodes,
9364 &extent_cache, &chunk_cache, &dev_cache,
9365 &block_group_cache, &dev_extent_cache);
9372 ret = check_chunks(&chunk_cache, &block_group_cache,
9373 &dev_extent_cache, NULL, NULL, NULL, 0);
9380 ret = check_extent_refs(root, &extent_cache);
9387 ret = check_devices(&dev_cache, &dev_extent_cache);
9392 task_stop(ctx.info);
9394 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9395 extent_io_tree_cleanup(&excluded_extents);
9396 root->fs_info->fsck_extent_cache = NULL;
9397 root->fs_info->free_extent_hook = NULL;
9398 root->fs_info->corrupt_blocks = NULL;
9399 root->fs_info->excluded_extents = NULL;
9402 free_chunk_cache_tree(&chunk_cache);
9403 free_device_cache_tree(&dev_cache);
9404 free_block_group_tree(&block_group_cache);
9405 free_device_extent_tree(&dev_extent_cache);
9406 free_extent_cache_tree(&seen);
9407 free_extent_cache_tree(&pending);
9408 free_extent_cache_tree(&reada);
9409 free_extent_cache_tree(&nodes);
9412 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
9413 free_extent_cache_tree(&seen);
9414 free_extent_cache_tree(&pending);
9415 free_extent_cache_tree(&reada);
9416 free_extent_cache_tree(&nodes);
9417 free_chunk_cache_tree(&chunk_cache);
9418 free_block_group_tree(&block_group_cache);
9419 free_device_cache_tree(&dev_cache);
9420 free_device_extent_tree(&dev_extent_cache);
9421 free_extent_record_cache(root->fs_info, &extent_cache);
9422 free_root_item_list(&normal_trees);
9423 free_root_item_list(&dropping_trees);
9424 extent_io_tree_cleanup(&excluded_extents);
9429 * Check backrefs of a tree block given by @bytenr or @eb.
9431 * @root: the root containing the @bytenr or @eb
9432 * @eb: tree block extent buffer, can be NULL
9433 * @bytenr: bytenr of the tree block to search
9434 * @level: tree level of the tree block
9435 * @owner: owner of the tree block
9437 * Return >0 for any error found and output error message
9438 * Return 0 for no error found
9440 static int check_tree_block_ref(struct btrfs_root *root,
9441 struct extent_buffer *eb, u64 bytenr,
9442 int level, u64 owner)
9444 struct btrfs_key key;
9445 struct btrfs_root *extent_root = root->fs_info->extent_root;
9446 struct btrfs_path path;
9447 struct btrfs_extent_item *ei;
9448 struct btrfs_extent_inline_ref *iref;
9449 struct extent_buffer *leaf;
9455 u32 nodesize = root->nodesize;
9462 btrfs_init_path(&path);
9463 key.objectid = bytenr;
9464 if (btrfs_fs_incompat(root->fs_info,
9465 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
9466 key.type = BTRFS_METADATA_ITEM_KEY;
9468 key.type = BTRFS_EXTENT_ITEM_KEY;
9469 key.offset = (u64)-1;
9471 /* Search for the backref in extent tree */
9472 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9474 err |= BACKREF_MISSING;
9477 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9479 err |= BACKREF_MISSING;
9483 leaf = path.nodes[0];
9484 slot = path.slots[0];
9485 btrfs_item_key_to_cpu(leaf, &key, slot);
9487 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9489 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9490 skinny_level = (int)key.offset;
9491 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9493 struct btrfs_tree_block_info *info;
9495 info = (struct btrfs_tree_block_info *)(ei + 1);
9496 skinny_level = btrfs_tree_block_level(leaf, info);
9497 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9504 if (!(btrfs_extent_flags(leaf, ei) &
9505 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9507 "extent[%llu %u] backref type mismatch, missing bit: %llx",
9508 key.objectid, nodesize,
9509 BTRFS_EXTENT_FLAG_TREE_BLOCK);
9510 err = BACKREF_MISMATCH;
9512 header_gen = btrfs_header_generation(eb);
9513 extent_gen = btrfs_extent_generation(leaf, ei);
9514 if (header_gen != extent_gen) {
9516 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
9517 key.objectid, nodesize, header_gen,
9519 err = BACKREF_MISMATCH;
9521 if (level != skinny_level) {
9523 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
9524 key.objectid, nodesize, level, skinny_level);
9525 err = BACKREF_MISMATCH;
9527 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
9529 "extent[%llu %u] is referred by other roots than %llu",
9530 key.objectid, nodesize, root->objectid);
9531 err = BACKREF_MISMATCH;
9536 * Iterate the extent/metadata item to find the exact backref
9538 item_size = btrfs_item_size_nr(leaf, slot);
9539 ptr = (unsigned long)iref;
9540 end = (unsigned long)ei + item_size;
9542 iref = (struct btrfs_extent_inline_ref *)ptr;
9543 type = btrfs_extent_inline_ref_type(leaf, iref);
9544 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9546 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9547 (offset == root->objectid || offset == owner)) {
9549 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
9550 /* Check if the backref points to valid referencer */
9551 found_ref = !check_tree_block_ref(root, NULL, offset,
9557 ptr += btrfs_extent_inline_ref_size(type);
9561 * Inlined extent item doesn't have what we need, check
9562 * TREE_BLOCK_REF_KEY
9565 btrfs_release_path(&path);
9566 key.objectid = bytenr;
9567 key.type = BTRFS_TREE_BLOCK_REF_KEY;
9568 key.offset = root->objectid;
9570 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9575 err |= BACKREF_MISSING;
9577 btrfs_release_path(&path);
9578 if (eb && (err & BACKREF_MISSING))
9579 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
9580 bytenr, nodesize, owner, level);
9585 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
9587 * Return >0 any error found and output error message
9588 * Return 0 for no error found
9590 static int check_extent_data_item(struct btrfs_root *root,
9591 struct extent_buffer *eb, int slot)
9593 struct btrfs_file_extent_item *fi;
9594 struct btrfs_path path;
9595 struct btrfs_root *extent_root = root->fs_info->extent_root;
9596 struct btrfs_key fi_key;
9597 struct btrfs_key dbref_key;
9598 struct extent_buffer *leaf;
9599 struct btrfs_extent_item *ei;
9600 struct btrfs_extent_inline_ref *iref;
9601 struct btrfs_extent_data_ref *dref;
9603 u64 file_extent_gen;
9606 u64 extent_num_bytes;
9614 int found_dbackref = 0;
9618 btrfs_item_key_to_cpu(eb, &fi_key, slot);
9619 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
9620 file_extent_gen = btrfs_file_extent_generation(eb, fi);
9622 /* Nothing to check for hole and inline data extents */
9623 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
9624 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
9627 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
9628 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
9629 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
9631 /* Check unaligned disk_num_bytes and num_bytes */
9632 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
9634 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
9635 fi_key.objectid, fi_key.offset, disk_num_bytes,
9637 err |= BYTES_UNALIGNED;
9639 data_bytes_allocated += disk_num_bytes;
9641 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
9643 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
9644 fi_key.objectid, fi_key.offset, extent_num_bytes,
9646 err |= BYTES_UNALIGNED;
9648 data_bytes_referenced += extent_num_bytes;
9650 owner = btrfs_header_owner(eb);
9652 /* Check the extent item of the file extent in extent tree */
9653 btrfs_init_path(&path);
9654 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9655 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
9656 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
9658 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
9660 err |= BACKREF_MISSING;
9664 leaf = path.nodes[0];
9665 slot = path.slots[0];
9666 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9668 extent_flags = btrfs_extent_flags(leaf, ei);
9669 extent_gen = btrfs_extent_generation(leaf, ei);
9671 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
9673 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
9674 disk_bytenr, disk_num_bytes,
9675 BTRFS_EXTENT_FLAG_DATA);
9676 err |= BACKREF_MISMATCH;
9679 if (file_extent_gen < extent_gen) {
9681 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
9682 disk_bytenr, disk_num_bytes, file_extent_gen,
9684 err |= BACKREF_MISMATCH;
9687 /* Check data backref inside that extent item */
9688 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
9689 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9690 ptr = (unsigned long)iref;
9691 end = (unsigned long)ei + item_size;
9693 iref = (struct btrfs_extent_inline_ref *)ptr;
9694 type = btrfs_extent_inline_ref_type(leaf, iref);
9695 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9697 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9698 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9699 if (ref_root == owner || ref_root == root->objectid)
9701 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9702 found_dbackref = !check_tree_block_ref(root, NULL,
9703 btrfs_extent_inline_ref_offset(leaf, iref),
9709 ptr += btrfs_extent_inline_ref_size(type);
9712 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9713 if (!found_dbackref) {
9714 btrfs_release_path(&path);
9716 btrfs_init_path(&path);
9717 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9718 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9719 dbref_key.offset = hash_extent_data_ref(root->objectid,
9720 fi_key.objectid, fi_key.offset);
9722 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9723 &dbref_key, &path, 0, 0);
9728 if (!found_dbackref)
9729 err |= BACKREF_MISSING;
9731 btrfs_release_path(&path);
9732 if (err & BACKREF_MISSING) {
9733 error("data extent[%llu %llu] backref lost",
9734 disk_bytenr, disk_num_bytes);
9740 * Get real tree block level for the case like shared block
9741 * Return >= 0 as tree level
9742 * Return <0 for error
9744 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9746 struct extent_buffer *eb;
9747 struct btrfs_path path;
9748 struct btrfs_key key;
9749 struct btrfs_extent_item *ei;
9752 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9757 /* Search extent tree for extent generation and level */
9758 key.objectid = bytenr;
9759 key.type = BTRFS_METADATA_ITEM_KEY;
9760 key.offset = (u64)-1;
9762 btrfs_init_path(&path);
9763 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9766 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9774 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9775 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9776 struct btrfs_extent_item);
9777 flags = btrfs_extent_flags(path.nodes[0], ei);
9778 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9783 /* Get transid for later read_tree_block() check */
9784 transid = btrfs_extent_generation(path.nodes[0], ei);
9786 /* Get backref level as one source */
9787 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9788 backref_level = key.offset;
9790 struct btrfs_tree_block_info *info;
9792 info = (struct btrfs_tree_block_info *)(ei + 1);
9793 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9795 btrfs_release_path(&path);
9797 /* Get level from tree block as an alternative source */
9798 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9799 if (!extent_buffer_uptodate(eb)) {
9800 free_extent_buffer(eb);
9803 header_level = btrfs_header_level(eb);
9804 free_extent_buffer(eb);
9806 if (header_level != backref_level)
9808 return header_level;
9811 btrfs_release_path(&path);
9816 * Check if a tree block backref is valid (points to a valid tree block)
9817 * if level == -1, level will be resolved
9818 * Return >0 for any error found and print error message
9820 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9821 u64 bytenr, int level)
9823 struct btrfs_root *root;
9824 struct btrfs_key key;
9825 struct btrfs_path path;
9826 struct extent_buffer *eb;
9827 struct extent_buffer *node;
9828 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9832 /* Query level for level == -1 special case */
9834 level = query_tree_block_level(fs_info, bytenr);
9836 err |= REFERENCER_MISSING;
9840 key.objectid = root_id;
9841 key.type = BTRFS_ROOT_ITEM_KEY;
9842 key.offset = (u64)-1;
9844 root = btrfs_read_fs_root(fs_info, &key);
9846 err |= REFERENCER_MISSING;
9850 /* Read out the tree block to get item/node key */
9851 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9852 if (!extent_buffer_uptodate(eb)) {
9853 err |= REFERENCER_MISSING;
9854 free_extent_buffer(eb);
9858 /* Empty tree, no need to check key */
9859 if (!btrfs_header_nritems(eb) && !level) {
9860 free_extent_buffer(eb);
9865 btrfs_node_key_to_cpu(eb, &key, 0);
9867 btrfs_item_key_to_cpu(eb, &key, 0);
9869 free_extent_buffer(eb);
9871 btrfs_init_path(&path);
9872 path.lowest_level = level;
9873 /* Search with the first key, to ensure we can reach it */
9874 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9876 err |= REFERENCER_MISSING;
9880 node = path.nodes[level];
9881 if (btrfs_header_bytenr(node) != bytenr) {
9883 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9884 bytenr, nodesize, bytenr,
9885 btrfs_header_bytenr(node));
9886 err |= REFERENCER_MISMATCH;
9888 if (btrfs_header_level(node) != level) {
9890 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9891 bytenr, nodesize, level,
9892 btrfs_header_level(node));
9893 err |= REFERENCER_MISMATCH;
9897 btrfs_release_path(&path);
9899 if (err & REFERENCER_MISSING) {
9901 error("extent [%llu %d] lost referencer (owner: %llu)",
9902 bytenr, nodesize, root_id);
9905 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9906 bytenr, nodesize, root_id, level);
9913 * Check referencer for shared block backref
9914 * If level == -1, this function will resolve the level.
9916 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9917 u64 parent, u64 bytenr, int level)
9919 struct extent_buffer *eb;
9920 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9922 int found_parent = 0;
9925 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9926 if (!extent_buffer_uptodate(eb))
9930 level = query_tree_block_level(fs_info, bytenr);
9934 if (level + 1 != btrfs_header_level(eb))
9937 nr = btrfs_header_nritems(eb);
9938 for (i = 0; i < nr; i++) {
9939 if (bytenr == btrfs_node_blockptr(eb, i)) {
9945 free_extent_buffer(eb);
9946 if (!found_parent) {
9948 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9949 bytenr, nodesize, parent, level);
9950 return REFERENCER_MISSING;
9956 * Check referencer for normal (inlined) data ref
9957 * If len == 0, it will be resolved by searching in extent tree
9959 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9960 u64 root_id, u64 objectid, u64 offset,
9961 u64 bytenr, u64 len, u32 count)
9963 struct btrfs_root *root;
9964 struct btrfs_root *extent_root = fs_info->extent_root;
9965 struct btrfs_key key;
9966 struct btrfs_path path;
9967 struct extent_buffer *leaf;
9968 struct btrfs_file_extent_item *fi;
9969 u32 found_count = 0;
9974 key.objectid = bytenr;
9975 key.type = BTRFS_EXTENT_ITEM_KEY;
9976 key.offset = (u64)-1;
9978 btrfs_init_path(&path);
9979 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9982 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9985 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9986 if (key.objectid != bytenr ||
9987 key.type != BTRFS_EXTENT_ITEM_KEY)
9990 btrfs_release_path(&path);
9992 key.objectid = root_id;
9993 key.type = BTRFS_ROOT_ITEM_KEY;
9994 key.offset = (u64)-1;
9995 btrfs_init_path(&path);
9997 root = btrfs_read_fs_root(fs_info, &key);
10001 key.objectid = objectid;
10002 key.type = BTRFS_EXTENT_DATA_KEY;
10004 * It can be nasty as data backref offset is
10005 * file offset - file extent offset, which is smaller or
10006 * equal to original backref offset. The only special case is
10007 * overflow. So we need to special check and do further search.
10009 key.offset = offset & (1ULL << 63) ? 0 : offset;
10011 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
10016 * Search afterwards to get correct one
10017 * NOTE: As we must do a comprehensive check on the data backref to
10018 * make sure the dref count also matches, we must iterate all file
10019 * extents for that inode.
10022 leaf = path.nodes[0];
10023 slot = path.slots[0];
10025 btrfs_item_key_to_cpu(leaf, &key, slot);
10026 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
10028 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
10030 * Except normal disk bytenr and disk num bytes, we still
10031 * need to do extra check on dbackref offset as
10032 * dbackref offset = file_offset - file_extent_offset
10034 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
10035 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
10036 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
10040 ret = btrfs_next_item(root, &path);
10045 btrfs_release_path(&path);
10046 if (found_count != count) {
10048 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
10049 bytenr, len, root_id, objectid, offset, count, found_count);
10050 return REFERENCER_MISSING;
10056 * Check if the referencer of a shared data backref exists
10058 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
10059 u64 parent, u64 bytenr)
10061 struct extent_buffer *eb;
10062 struct btrfs_key key;
10063 struct btrfs_file_extent_item *fi;
10064 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10066 int found_parent = 0;
10069 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
10070 if (!extent_buffer_uptodate(eb))
10073 nr = btrfs_header_nritems(eb);
10074 for (i = 0; i < nr; i++) {
10075 btrfs_item_key_to_cpu(eb, &key, i);
10076 if (key.type != BTRFS_EXTENT_DATA_KEY)
10079 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
10080 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
10083 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
10090 free_extent_buffer(eb);
10091 if (!found_parent) {
10092 error("shared extent %llu referencer lost (parent: %llu)",
10094 return REFERENCER_MISSING;
10100 * This function will check a given extent item, including its backref and
10101 * itself (like crossing stripe boundary and type)
10103 * Since we don't use extent_record anymore, introduce new error bit
10105 static int check_extent_item(struct btrfs_fs_info *fs_info,
10106 struct extent_buffer *eb, int slot)
10108 struct btrfs_extent_item *ei;
10109 struct btrfs_extent_inline_ref *iref;
10110 struct btrfs_extent_data_ref *dref;
10114 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10115 u32 item_size = btrfs_item_size_nr(eb, slot);
10120 struct btrfs_key key;
10124 btrfs_item_key_to_cpu(eb, &key, slot);
10125 if (key.type == BTRFS_EXTENT_ITEM_KEY)
10126 bytes_used += key.offset;
10128 bytes_used += nodesize;
10130 if (item_size < sizeof(*ei)) {
10132 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
10133 * old thing when on disk format is still un-determined.
10134 * No need to care about it anymore
10136 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
10140 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
10141 flags = btrfs_extent_flags(eb, ei);
10143 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
10145 if (metadata && check_crossing_stripes(global_info, key.objectid,
10147 error("bad metadata [%llu, %llu) crossing stripe boundary",
10148 key.objectid, key.objectid + nodesize);
10149 err |= CROSSING_STRIPE_BOUNDARY;
10152 ptr = (unsigned long)(ei + 1);
10154 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
10155 /* Old EXTENT_ITEM metadata */
10156 struct btrfs_tree_block_info *info;
10158 info = (struct btrfs_tree_block_info *)ptr;
10159 level = btrfs_tree_block_level(eb, info);
10160 ptr += sizeof(struct btrfs_tree_block_info);
10162 /* New METADATA_ITEM */
10163 level = key.offset;
10165 end = (unsigned long)ei + item_size;
10168 err |= ITEM_SIZE_MISMATCH;
10172 /* Now check every backref in this extent item */
10174 iref = (struct btrfs_extent_inline_ref *)ptr;
10175 type = btrfs_extent_inline_ref_type(eb, iref);
10176 offset = btrfs_extent_inline_ref_offset(eb, iref);
10178 case BTRFS_TREE_BLOCK_REF_KEY:
10179 ret = check_tree_block_backref(fs_info, offset, key.objectid,
10183 case BTRFS_SHARED_BLOCK_REF_KEY:
10184 ret = check_shared_block_backref(fs_info, offset, key.objectid,
10188 case BTRFS_EXTENT_DATA_REF_KEY:
10189 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10190 ret = check_extent_data_backref(fs_info,
10191 btrfs_extent_data_ref_root(eb, dref),
10192 btrfs_extent_data_ref_objectid(eb, dref),
10193 btrfs_extent_data_ref_offset(eb, dref),
10194 key.objectid, key.offset,
10195 btrfs_extent_data_ref_count(eb, dref));
10198 case BTRFS_SHARED_DATA_REF_KEY:
10199 ret = check_shared_data_backref(fs_info, offset, key.objectid);
10203 error("extent[%llu %d %llu] has unknown ref type: %d",
10204 key.objectid, key.type, key.offset, type);
10205 err |= UNKNOWN_TYPE;
10209 ptr += btrfs_extent_inline_ref_size(type);
10218 * Check if a dev extent item is referred correctly by its chunk
10220 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
10221 struct extent_buffer *eb, int slot)
10223 struct btrfs_root *chunk_root = fs_info->chunk_root;
10224 struct btrfs_dev_extent *ptr;
10225 struct btrfs_path path;
10226 struct btrfs_key chunk_key;
10227 struct btrfs_key devext_key;
10228 struct btrfs_chunk *chunk;
10229 struct extent_buffer *l;
10233 int found_chunk = 0;
10236 btrfs_item_key_to_cpu(eb, &devext_key, slot);
10237 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
10238 length = btrfs_dev_extent_length(eb, ptr);
10240 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
10241 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10242 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
10244 btrfs_init_path(&path);
10245 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10250 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
10251 if (btrfs_chunk_length(l, chunk) != length)
10254 num_stripes = btrfs_chunk_num_stripes(l, chunk);
10255 for (i = 0; i < num_stripes; i++) {
10256 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
10257 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
10259 if (devid == devext_key.objectid &&
10260 offset == devext_key.offset) {
10266 btrfs_release_path(&path);
10267 if (!found_chunk) {
10269 "device extent[%llu, %llu, %llu] did not find the related chunk",
10270 devext_key.objectid, devext_key.offset, length);
10271 return REFERENCER_MISSING;
10277 * Check if the used space is correct with the dev item
10279 static int check_dev_item(struct btrfs_fs_info *fs_info,
10280 struct extent_buffer *eb, int slot)
10282 struct btrfs_root *dev_root = fs_info->dev_root;
10283 struct btrfs_dev_item *dev_item;
10284 struct btrfs_path path;
10285 struct btrfs_key key;
10286 struct btrfs_dev_extent *ptr;
10292 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
10293 dev_id = btrfs_device_id(eb, dev_item);
10294 used = btrfs_device_bytes_used(eb, dev_item);
10296 key.objectid = dev_id;
10297 key.type = BTRFS_DEV_EXTENT_KEY;
10300 btrfs_init_path(&path);
10301 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
10303 btrfs_item_key_to_cpu(eb, &key, slot);
10304 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
10305 key.objectid, key.type, key.offset);
10306 btrfs_release_path(&path);
10307 return REFERENCER_MISSING;
10310 /* Iterate dev_extents to calculate the used space of a device */
10312 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10314 if (key.objectid > dev_id)
10316 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
10319 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
10320 struct btrfs_dev_extent);
10321 total += btrfs_dev_extent_length(path.nodes[0], ptr);
10323 ret = btrfs_next_item(dev_root, &path);
10327 btrfs_release_path(&path);
10329 if (used != total) {
10330 btrfs_item_key_to_cpu(eb, &key, slot);
10332 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
10333 total, used, BTRFS_ROOT_TREE_OBJECTID,
10334 BTRFS_DEV_EXTENT_KEY, dev_id);
10335 return ACCOUNTING_MISMATCH;
10341 * Check a block group item with its referener (chunk) and its used space
10342 * with extent/metadata item
10344 static int check_block_group_item(struct btrfs_fs_info *fs_info,
10345 struct extent_buffer *eb, int slot)
10347 struct btrfs_root *extent_root = fs_info->extent_root;
10348 struct btrfs_root *chunk_root = fs_info->chunk_root;
10349 struct btrfs_block_group_item *bi;
10350 struct btrfs_block_group_item bg_item;
10351 struct btrfs_path path;
10352 struct btrfs_key bg_key;
10353 struct btrfs_key chunk_key;
10354 struct btrfs_key extent_key;
10355 struct btrfs_chunk *chunk;
10356 struct extent_buffer *leaf;
10357 struct btrfs_extent_item *ei;
10358 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10366 btrfs_item_key_to_cpu(eb, &bg_key, slot);
10367 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
10368 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
10369 used = btrfs_block_group_used(&bg_item);
10370 bg_flags = btrfs_block_group_flags(&bg_item);
10372 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
10373 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
10374 chunk_key.offset = bg_key.objectid;
10376 btrfs_init_path(&path);
10377 /* Search for the referencer chunk */
10378 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
10381 "block group[%llu %llu] did not find the related chunk item",
10382 bg_key.objectid, bg_key.offset);
10383 err |= REFERENCER_MISSING;
10385 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
10386 struct btrfs_chunk);
10387 if (btrfs_chunk_length(path.nodes[0], chunk) !=
10390 "block group[%llu %llu] related chunk item length does not match",
10391 bg_key.objectid, bg_key.offset);
10392 err |= REFERENCER_MISMATCH;
10395 btrfs_release_path(&path);
10397 /* Search from the block group bytenr */
10398 extent_key.objectid = bg_key.objectid;
10399 extent_key.type = 0;
10400 extent_key.offset = 0;
10402 btrfs_init_path(&path);
10403 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
10407 /* Iterate extent tree to account used space */
10409 leaf = path.nodes[0];
10410 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
10411 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
10414 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
10415 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
10417 if (extent_key.objectid < bg_key.objectid)
10420 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
10423 total += extent_key.offset;
10425 ei = btrfs_item_ptr(leaf, path.slots[0],
10426 struct btrfs_extent_item);
10427 flags = btrfs_extent_flags(leaf, ei);
10428 if (flags & BTRFS_EXTENT_FLAG_DATA) {
10429 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
10431 "bad extent[%llu, %llu) type mismatch with chunk",
10432 extent_key.objectid,
10433 extent_key.objectid + extent_key.offset);
10434 err |= CHUNK_TYPE_MISMATCH;
10436 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
10437 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
10438 BTRFS_BLOCK_GROUP_METADATA))) {
10440 "bad extent[%llu, %llu) type mismatch with chunk",
10441 extent_key.objectid,
10442 extent_key.objectid + nodesize);
10443 err |= CHUNK_TYPE_MISMATCH;
10447 ret = btrfs_next_item(extent_root, &path);
10453 btrfs_release_path(&path);
10455 if (total != used) {
10457 "block group[%llu %llu] used %llu but extent items used %llu",
10458 bg_key.objectid, bg_key.offset, used, total);
10459 err |= ACCOUNTING_MISMATCH;
10465 * Check a chunk item.
10466 * Including checking all referred dev_extents and block group
10468 static int check_chunk_item(struct btrfs_fs_info *fs_info,
10469 struct extent_buffer *eb, int slot)
10471 struct btrfs_root *extent_root = fs_info->extent_root;
10472 struct btrfs_root *dev_root = fs_info->dev_root;
10473 struct btrfs_path path;
10474 struct btrfs_key chunk_key;
10475 struct btrfs_key bg_key;
10476 struct btrfs_key devext_key;
10477 struct btrfs_chunk *chunk;
10478 struct extent_buffer *leaf;
10479 struct btrfs_block_group_item *bi;
10480 struct btrfs_block_group_item bg_item;
10481 struct btrfs_dev_extent *ptr;
10482 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
10494 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
10495 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
10496 length = btrfs_chunk_length(eb, chunk);
10497 chunk_end = chunk_key.offset + length;
10498 if (!IS_ALIGNED(length, sectorsize)) {
10499 error("chunk[%llu %llu) not aligned to %u",
10500 chunk_key.offset, chunk_end, sectorsize);
10501 err |= BYTES_UNALIGNED;
10505 type = btrfs_chunk_type(eb, chunk);
10506 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
10507 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
10508 error("chunk[%llu %llu) has no chunk type",
10509 chunk_key.offset, chunk_end);
10510 err |= UNKNOWN_TYPE;
10512 if (profile && (profile & (profile - 1))) {
10513 error("chunk[%llu %llu) multiple profiles detected: %llx",
10514 chunk_key.offset, chunk_end, profile);
10515 err |= UNKNOWN_TYPE;
10518 bg_key.objectid = chunk_key.offset;
10519 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
10520 bg_key.offset = length;
10522 btrfs_init_path(&path);
10523 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
10526 "chunk[%llu %llu) did not find the related block group item",
10527 chunk_key.offset, chunk_end);
10528 err |= REFERENCER_MISSING;
10530 leaf = path.nodes[0];
10531 bi = btrfs_item_ptr(leaf, path.slots[0],
10532 struct btrfs_block_group_item);
10533 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
10535 if (btrfs_block_group_flags(&bg_item) != type) {
10537 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
10538 chunk_key.offset, chunk_end, type,
10539 btrfs_block_group_flags(&bg_item));
10540 err |= REFERENCER_MISSING;
10544 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
10545 for (i = 0; i < num_stripes; i++) {
10546 btrfs_release_path(&path);
10547 btrfs_init_path(&path);
10548 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
10549 devext_key.type = BTRFS_DEV_EXTENT_KEY;
10550 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
10552 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
10555 goto not_match_dev;
10557 leaf = path.nodes[0];
10558 ptr = btrfs_item_ptr(leaf, path.slots[0],
10559 struct btrfs_dev_extent);
10560 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
10561 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
10562 if (objectid != chunk_key.objectid ||
10563 offset != chunk_key.offset ||
10564 btrfs_dev_extent_length(leaf, ptr) != length)
10565 goto not_match_dev;
10568 err |= BACKREF_MISSING;
10570 "chunk[%llu %llu) stripe %d did not find the related dev extent",
10571 chunk_key.objectid, chunk_end, i);
10574 btrfs_release_path(&path);
10580 * Main entry function to check known items and update related accounting info
10582 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
10584 struct btrfs_fs_info *fs_info = root->fs_info;
10585 struct btrfs_key key;
10588 struct btrfs_extent_data_ref *dref;
10593 btrfs_item_key_to_cpu(eb, &key, slot);
10597 case BTRFS_EXTENT_DATA_KEY:
10598 ret = check_extent_data_item(root, eb, slot);
10601 case BTRFS_BLOCK_GROUP_ITEM_KEY:
10602 ret = check_block_group_item(fs_info, eb, slot);
10605 case BTRFS_DEV_ITEM_KEY:
10606 ret = check_dev_item(fs_info, eb, slot);
10609 case BTRFS_CHUNK_ITEM_KEY:
10610 ret = check_chunk_item(fs_info, eb, slot);
10613 case BTRFS_DEV_EXTENT_KEY:
10614 ret = check_dev_extent_item(fs_info, eb, slot);
10617 case BTRFS_EXTENT_ITEM_KEY:
10618 case BTRFS_METADATA_ITEM_KEY:
10619 ret = check_extent_item(fs_info, eb, slot);
10622 case BTRFS_EXTENT_CSUM_KEY:
10623 total_csum_bytes += btrfs_item_size_nr(eb, slot);
10625 case BTRFS_TREE_BLOCK_REF_KEY:
10626 ret = check_tree_block_backref(fs_info, key.offset,
10630 case BTRFS_EXTENT_DATA_REF_KEY:
10631 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
10632 ret = check_extent_data_backref(fs_info,
10633 btrfs_extent_data_ref_root(eb, dref),
10634 btrfs_extent_data_ref_objectid(eb, dref),
10635 btrfs_extent_data_ref_offset(eb, dref),
10637 btrfs_extent_data_ref_count(eb, dref));
10640 case BTRFS_SHARED_BLOCK_REF_KEY:
10641 ret = check_shared_block_backref(fs_info, key.offset,
10645 case BTRFS_SHARED_DATA_REF_KEY:
10646 ret = check_shared_data_backref(fs_info, key.offset,
10654 if (++slot < btrfs_header_nritems(eb))
10661 * Helper function for later fs/subvol tree check. To determine if a tree
10662 * block should be checked.
10663 * This function will ensure only the direct referencer with lowest rootid to
10664 * check a fs/subvolume tree block.
10666 * Backref check at extent tree would detect errors like missing subvolume
10667 * tree, so we can do aggressive check to reduce duplicated checks.
10669 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
10671 struct btrfs_root *extent_root = root->fs_info->extent_root;
10672 struct btrfs_key key;
10673 struct btrfs_path path;
10674 struct extent_buffer *leaf;
10676 struct btrfs_extent_item *ei;
10682 struct btrfs_extent_inline_ref *iref;
10685 btrfs_init_path(&path);
10686 key.objectid = btrfs_header_bytenr(eb);
10687 key.type = BTRFS_METADATA_ITEM_KEY;
10688 key.offset = (u64)-1;
10691 * Any failure in backref resolving means we can't determine
10692 * whom the tree block belongs to.
10693 * So in that case, we need to check that tree block
10695 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10699 ret = btrfs_previous_extent_item(extent_root, &path,
10700 btrfs_header_bytenr(eb));
10704 leaf = path.nodes[0];
10705 slot = path.slots[0];
10706 btrfs_item_key_to_cpu(leaf, &key, slot);
10707 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10709 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10710 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10712 struct btrfs_tree_block_info *info;
10714 info = (struct btrfs_tree_block_info *)(ei + 1);
10715 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10718 item_size = btrfs_item_size_nr(leaf, slot);
10719 ptr = (unsigned long)iref;
10720 end = (unsigned long)ei + item_size;
10721 while (ptr < end) {
10722 iref = (struct btrfs_extent_inline_ref *)ptr;
10723 type = btrfs_extent_inline_ref_type(leaf, iref);
10724 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10727 * We only check the tree block if current root is
10728 * the lowest referencer of it.
10730 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10731 offset < root->objectid) {
10732 btrfs_release_path(&path);
10736 ptr += btrfs_extent_inline_ref_size(type);
10739 * Normally we should also check keyed tree block ref, but that may be
10740 * very time consuming. Inlined ref should already make us skip a lot
10741 * of refs now. So skip search keyed tree block ref.
10745 btrfs_release_path(&path);
10750 * Traversal function for tree block. We will do:
10751 * 1) Skip shared fs/subvolume tree blocks
10752 * 2) Update related bytes accounting
10753 * 3) Pre-order traversal
10755 static int traverse_tree_block(struct btrfs_root *root,
10756 struct extent_buffer *node)
10758 struct extent_buffer *eb;
10759 struct btrfs_key key;
10760 struct btrfs_key drop_key;
10768 * Skip shared fs/subvolume tree block, in that case they will
10769 * be checked by referencer with lowest rootid
10771 if (is_fstree(root->objectid) && !should_check(root, node))
10774 /* Update bytes accounting */
10775 total_btree_bytes += node->len;
10776 if (fs_root_objectid(btrfs_header_owner(node)))
10777 total_fs_tree_bytes += node->len;
10778 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10779 total_extent_tree_bytes += node->len;
10780 if (!found_old_backref &&
10781 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10782 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10783 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10784 found_old_backref = 1;
10786 /* pre-order tranversal, check itself first */
10787 level = btrfs_header_level(node);
10788 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10789 btrfs_header_level(node),
10790 btrfs_header_owner(node));
10794 "check %s failed root %llu bytenr %llu level %d, force continue check",
10795 level ? "node":"leaf", root->objectid,
10796 btrfs_header_bytenr(node), btrfs_header_level(node));
10799 btree_space_waste += btrfs_leaf_free_space(root, node);
10800 ret = check_leaf_items(root, node);
10805 nr = btrfs_header_nritems(node);
10806 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10807 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10808 sizeof(struct btrfs_key_ptr);
10810 /* Then check all its children */
10811 for (i = 0; i < nr; i++) {
10812 u64 blocknr = btrfs_node_blockptr(node, i);
10814 btrfs_node_key_to_cpu(node, &key, i);
10815 if (level == root->root_item.drop_level &&
10816 is_dropped_key(&key, &drop_key))
10820 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10821 * to call the function itself.
10823 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10824 if (extent_buffer_uptodate(eb)) {
10825 ret = traverse_tree_block(root, eb);
10828 free_extent_buffer(eb);
10835 * Low memory usage version check_chunks_and_extents.
10837 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10839 struct btrfs_path path;
10840 struct btrfs_key key;
10841 struct btrfs_root *root1;
10842 struct btrfs_root *cur_root;
10846 root1 = root->fs_info->chunk_root;
10847 ret = traverse_tree_block(root1, root1->node);
10850 root1 = root->fs_info->tree_root;
10851 ret = traverse_tree_block(root1, root1->node);
10854 btrfs_init_path(&path);
10855 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10857 key.type = BTRFS_ROOT_ITEM_KEY;
10859 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10861 error("cannot find extent treet in tree_root");
10866 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10867 if (key.type != BTRFS_ROOT_ITEM_KEY)
10869 key.offset = (u64)-1;
10871 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10872 if (IS_ERR(cur_root) || !cur_root) {
10873 error("failed to read tree: %lld", key.objectid);
10877 ret = traverse_tree_block(cur_root, cur_root->node);
10881 ret = btrfs_next_item(root1, &path);
10887 btrfs_release_path(&path);
10891 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10892 struct btrfs_root *root, int overwrite)
10894 struct extent_buffer *c;
10895 struct extent_buffer *old = root->node;
10898 struct btrfs_disk_key disk_key = {0,0,0};
10904 extent_buffer_get(c);
10907 c = btrfs_alloc_free_block(trans, root,
10909 root->root_key.objectid,
10910 &disk_key, level, 0, 0);
10913 extent_buffer_get(c);
10917 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10918 btrfs_set_header_level(c, level);
10919 btrfs_set_header_bytenr(c, c->start);
10920 btrfs_set_header_generation(c, trans->transid);
10921 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10922 btrfs_set_header_owner(c, root->root_key.objectid);
10924 write_extent_buffer(c, root->fs_info->fsid,
10925 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10927 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10928 btrfs_header_chunk_tree_uuid(c),
10931 btrfs_mark_buffer_dirty(c);
10933 * this case can happen in the following case:
10935 * 1.overwrite previous root.
10937 * 2.reinit reloc data root, this is because we skip pin
10938 * down reloc data tree before which means we can allocate
10939 * same block bytenr here.
10941 if (old->start == c->start) {
10942 btrfs_set_root_generation(&root->root_item,
10944 root->root_item.level = btrfs_header_level(root->node);
10945 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10946 &root->root_key, &root->root_item);
10948 free_extent_buffer(c);
10952 free_extent_buffer(old);
10954 add_root_to_dirty_list(root);
10958 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10959 struct extent_buffer *eb, int tree_root)
10961 struct extent_buffer *tmp;
10962 struct btrfs_root_item *ri;
10963 struct btrfs_key key;
10966 int level = btrfs_header_level(eb);
10972 * If we have pinned this block before, don't pin it again.
10973 * This can not only avoid forever loop with broken filesystem
10974 * but also give us some speedups.
10976 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10977 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10980 btrfs_pin_extent(fs_info, eb->start, eb->len);
10982 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10983 nritems = btrfs_header_nritems(eb);
10984 for (i = 0; i < nritems; i++) {
10986 btrfs_item_key_to_cpu(eb, &key, i);
10987 if (key.type != BTRFS_ROOT_ITEM_KEY)
10989 /* Skip the extent root and reloc roots */
10990 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10991 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10992 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10994 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10995 bytenr = btrfs_disk_root_bytenr(eb, ri);
10998 * If at any point we start needing the real root we
10999 * will have to build a stump root for the root we are
11000 * in, but for now this doesn't actually use the root so
11001 * just pass in extent_root.
11003 tmp = read_tree_block(fs_info->extent_root, bytenr,
11005 if (!extent_buffer_uptodate(tmp)) {
11006 fprintf(stderr, "Error reading root block\n");
11009 ret = pin_down_tree_blocks(fs_info, tmp, 0);
11010 free_extent_buffer(tmp);
11014 bytenr = btrfs_node_blockptr(eb, i);
11016 /* If we aren't the tree root don't read the block */
11017 if (level == 1 && !tree_root) {
11018 btrfs_pin_extent(fs_info, bytenr, nodesize);
11022 tmp = read_tree_block(fs_info->extent_root, bytenr,
11024 if (!extent_buffer_uptodate(tmp)) {
11025 fprintf(stderr, "Error reading tree block\n");
11028 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
11029 free_extent_buffer(tmp);
11038 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
11042 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
11046 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
11049 static int reset_block_groups(struct btrfs_fs_info *fs_info)
11051 struct btrfs_block_group_cache *cache;
11052 struct btrfs_path path;
11053 struct extent_buffer *leaf;
11054 struct btrfs_chunk *chunk;
11055 struct btrfs_key key;
11059 btrfs_init_path(&path);
11061 key.type = BTRFS_CHUNK_ITEM_KEY;
11063 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
11065 btrfs_release_path(&path);
11070 * We do this in case the block groups were screwed up and had alloc
11071 * bits that aren't actually set on the chunks. This happens with
11072 * restored images every time and could happen in real life I guess.
11074 fs_info->avail_data_alloc_bits = 0;
11075 fs_info->avail_metadata_alloc_bits = 0;
11076 fs_info->avail_system_alloc_bits = 0;
11078 /* First we need to create the in-memory block groups */
11080 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11081 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
11083 btrfs_release_path(&path);
11091 leaf = path.nodes[0];
11092 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11093 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
11098 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
11099 btrfs_add_block_group(fs_info, 0,
11100 btrfs_chunk_type(leaf, chunk),
11101 key.objectid, key.offset,
11102 btrfs_chunk_length(leaf, chunk));
11103 set_extent_dirty(&fs_info->free_space_cache, key.offset,
11104 key.offset + btrfs_chunk_length(leaf, chunk),
11110 cache = btrfs_lookup_first_block_group(fs_info, start);
11114 start = cache->key.objectid + cache->key.offset;
11117 btrfs_release_path(&path);
11121 static int reset_balance(struct btrfs_trans_handle *trans,
11122 struct btrfs_fs_info *fs_info)
11124 struct btrfs_root *root = fs_info->tree_root;
11125 struct btrfs_path path;
11126 struct extent_buffer *leaf;
11127 struct btrfs_key key;
11128 int del_slot, del_nr = 0;
11132 btrfs_init_path(&path);
11133 key.objectid = BTRFS_BALANCE_OBJECTID;
11134 key.type = BTRFS_BALANCE_ITEM_KEY;
11136 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11141 goto reinit_data_reloc;
11146 ret = btrfs_del_item(trans, root, &path);
11149 btrfs_release_path(&path);
11151 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11152 key.type = BTRFS_ROOT_ITEM_KEY;
11154 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
11158 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11163 ret = btrfs_del_items(trans, root, &path,
11170 btrfs_release_path(&path);
11173 ret = btrfs_search_slot(trans, root, &key, &path,
11180 leaf = path.nodes[0];
11181 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11182 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
11184 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
11189 del_slot = path.slots[0];
11198 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
11202 btrfs_release_path(&path);
11205 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
11206 key.type = BTRFS_ROOT_ITEM_KEY;
11207 key.offset = (u64)-1;
11208 root = btrfs_read_fs_root(fs_info, &key);
11209 if (IS_ERR(root)) {
11210 fprintf(stderr, "Error reading data reloc tree\n");
11211 ret = PTR_ERR(root);
11214 record_root_in_trans(trans, root);
11215 ret = btrfs_fsck_reinit_root(trans, root, 0);
11218 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
11220 btrfs_release_path(&path);
11224 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
11225 struct btrfs_fs_info *fs_info)
11231 * The only reason we don't do this is because right now we're just
11232 * walking the trees we find and pinning down their bytes, we don't look
11233 * at any of the leaves. In order to do mixed groups we'd have to check
11234 * the leaves of any fs roots and pin down the bytes for any file
11235 * extents we find. Not hard but why do it if we don't have to?
11237 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
11238 fprintf(stderr, "We don't support re-initing the extent tree "
11239 "for mixed block groups yet, please notify a btrfs "
11240 "developer you want to do this so they can add this "
11241 "functionality.\n");
11246 * first we need to walk all of the trees except the extent tree and pin
11247 * down the bytes that are in use so we don't overwrite any existing
11250 ret = pin_metadata_blocks(fs_info);
11252 fprintf(stderr, "error pinning down used bytes\n");
11257 * Need to drop all the block groups since we're going to recreate all
11260 btrfs_free_block_groups(fs_info);
11261 ret = reset_block_groups(fs_info);
11263 fprintf(stderr, "error resetting the block groups\n");
11267 /* Ok we can allocate now, reinit the extent root */
11268 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
11270 fprintf(stderr, "extent root initialization failed\n");
11272 * When the transaction code is updated we should end the
11273 * transaction, but for now progs only knows about commit so
11274 * just return an error.
11280 * Now we have all the in-memory block groups setup so we can make
11281 * allocations properly, and the metadata we care about is safe since we
11282 * pinned all of it above.
11285 struct btrfs_block_group_cache *cache;
11287 cache = btrfs_lookup_first_block_group(fs_info, start);
11290 start = cache->key.objectid + cache->key.offset;
11291 ret = btrfs_insert_item(trans, fs_info->extent_root,
11292 &cache->key, &cache->item,
11293 sizeof(cache->item));
11295 fprintf(stderr, "Error adding block group\n");
11298 btrfs_extent_post_op(trans, fs_info->extent_root);
11301 ret = reset_balance(trans, fs_info);
11303 fprintf(stderr, "error resetting the pending balance\n");
11308 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
11310 struct btrfs_path path;
11311 struct btrfs_trans_handle *trans;
11312 struct btrfs_key key;
11315 printf("Recowing metadata block %llu\n", eb->start);
11316 key.objectid = btrfs_header_owner(eb);
11317 key.type = BTRFS_ROOT_ITEM_KEY;
11318 key.offset = (u64)-1;
11320 root = btrfs_read_fs_root(root->fs_info, &key);
11321 if (IS_ERR(root)) {
11322 fprintf(stderr, "Couldn't find owner root %llu\n",
11324 return PTR_ERR(root);
11327 trans = btrfs_start_transaction(root, 1);
11329 return PTR_ERR(trans);
11331 btrfs_init_path(&path);
11332 path.lowest_level = btrfs_header_level(eb);
11333 if (path.lowest_level)
11334 btrfs_node_key_to_cpu(eb, &key, 0);
11336 btrfs_item_key_to_cpu(eb, &key, 0);
11338 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
11339 btrfs_commit_transaction(trans, root);
11340 btrfs_release_path(&path);
11344 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
11346 struct btrfs_path path;
11347 struct btrfs_trans_handle *trans;
11348 struct btrfs_key key;
11351 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
11352 bad->key.type, bad->key.offset);
11353 key.objectid = bad->root_id;
11354 key.type = BTRFS_ROOT_ITEM_KEY;
11355 key.offset = (u64)-1;
11357 root = btrfs_read_fs_root(root->fs_info, &key);
11358 if (IS_ERR(root)) {
11359 fprintf(stderr, "Couldn't find owner root %llu\n",
11361 return PTR_ERR(root);
11364 trans = btrfs_start_transaction(root, 1);
11366 return PTR_ERR(trans);
11368 btrfs_init_path(&path);
11369 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
11375 ret = btrfs_del_item(trans, root, &path);
11377 btrfs_commit_transaction(trans, root);
11378 btrfs_release_path(&path);
11382 static int zero_log_tree(struct btrfs_root *root)
11384 struct btrfs_trans_handle *trans;
11387 trans = btrfs_start_transaction(root, 1);
11388 if (IS_ERR(trans)) {
11389 ret = PTR_ERR(trans);
11392 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
11393 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
11394 ret = btrfs_commit_transaction(trans, root);
11398 static int populate_csum(struct btrfs_trans_handle *trans,
11399 struct btrfs_root *csum_root, char *buf, u64 start,
11406 while (offset < len) {
11407 sectorsize = csum_root->sectorsize;
11408 ret = read_extent_data(csum_root, buf, start + offset,
11412 ret = btrfs_csum_file_block(trans, csum_root, start + len,
11413 start + offset, buf, sectorsize);
11416 offset += sectorsize;
11421 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
11422 struct btrfs_root *csum_root,
11423 struct btrfs_root *cur_root)
11425 struct btrfs_path path;
11426 struct btrfs_key key;
11427 struct extent_buffer *node;
11428 struct btrfs_file_extent_item *fi;
11435 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
11439 btrfs_init_path(&path);
11443 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
11446 /* Iterate all regular file extents and fill its csum */
11448 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11450 if (key.type != BTRFS_EXTENT_DATA_KEY)
11452 node = path.nodes[0];
11453 slot = path.slots[0];
11454 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
11455 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
11457 start = btrfs_file_extent_disk_bytenr(node, fi);
11458 len = btrfs_file_extent_disk_num_bytes(node, fi);
11460 ret = populate_csum(trans, csum_root, buf, start, len);
11461 if (ret == -EEXIST)
11467 * TODO: if next leaf is corrupted, jump to nearest next valid
11470 ret = btrfs_next_item(cur_root, &path);
11480 btrfs_release_path(&path);
11485 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
11486 struct btrfs_root *csum_root)
11488 struct btrfs_fs_info *fs_info = csum_root->fs_info;
11489 struct btrfs_path path;
11490 struct btrfs_root *tree_root = fs_info->tree_root;
11491 struct btrfs_root *cur_root;
11492 struct extent_buffer *node;
11493 struct btrfs_key key;
11497 btrfs_init_path(&path);
11498 key.objectid = BTRFS_FS_TREE_OBJECTID;
11500 key.type = BTRFS_ROOT_ITEM_KEY;
11501 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
11510 node = path.nodes[0];
11511 slot = path.slots[0];
11512 btrfs_item_key_to_cpu(node, &key, slot);
11513 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
11515 if (key.type != BTRFS_ROOT_ITEM_KEY)
11517 if (!is_fstree(key.objectid))
11519 key.offset = (u64)-1;
11521 cur_root = btrfs_read_fs_root(fs_info, &key);
11522 if (IS_ERR(cur_root) || !cur_root) {
11523 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
11527 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
11532 ret = btrfs_next_item(tree_root, &path);
11542 btrfs_release_path(&path);
11546 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
11547 struct btrfs_root *csum_root)
11549 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
11550 struct btrfs_path path;
11551 struct btrfs_extent_item *ei;
11552 struct extent_buffer *leaf;
11554 struct btrfs_key key;
11557 btrfs_init_path(&path);
11559 key.type = BTRFS_EXTENT_ITEM_KEY;
11561 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11563 btrfs_release_path(&path);
11567 buf = malloc(csum_root->sectorsize);
11569 btrfs_release_path(&path);
11574 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
11575 ret = btrfs_next_leaf(extent_root, &path);
11583 leaf = path.nodes[0];
11585 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
11586 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
11591 ei = btrfs_item_ptr(leaf, path.slots[0],
11592 struct btrfs_extent_item);
11593 if (!(btrfs_extent_flags(leaf, ei) &
11594 BTRFS_EXTENT_FLAG_DATA)) {
11599 ret = populate_csum(trans, csum_root, buf, key.objectid,
11606 btrfs_release_path(&path);
11612 * Recalculate the csum and put it into the csum tree.
11614 * Extent tree init will wipe out all the extent info, so in that case, we
11615 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
11616 * will use fs/subvol trees to init the csum tree.
11618 static int fill_csum_tree(struct btrfs_trans_handle *trans,
11619 struct btrfs_root *csum_root,
11620 int search_fs_tree)
11622 if (search_fs_tree)
11623 return fill_csum_tree_from_fs(trans, csum_root);
11625 return fill_csum_tree_from_extent(trans, csum_root);
11628 static void free_roots_info_cache(void)
11630 if (!roots_info_cache)
11633 while (!cache_tree_empty(roots_info_cache)) {
11634 struct cache_extent *entry;
11635 struct root_item_info *rii;
11637 entry = first_cache_extent(roots_info_cache);
11640 remove_cache_extent(roots_info_cache, entry);
11641 rii = container_of(entry, struct root_item_info, cache_extent);
11645 free(roots_info_cache);
11646 roots_info_cache = NULL;
11649 static int build_roots_info_cache(struct btrfs_fs_info *info)
11652 struct btrfs_key key;
11653 struct extent_buffer *leaf;
11654 struct btrfs_path path;
11656 if (!roots_info_cache) {
11657 roots_info_cache = malloc(sizeof(*roots_info_cache));
11658 if (!roots_info_cache)
11660 cache_tree_init(roots_info_cache);
11663 btrfs_init_path(&path);
11665 key.type = BTRFS_EXTENT_ITEM_KEY;
11667 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
11670 leaf = path.nodes[0];
11673 struct btrfs_key found_key;
11674 struct btrfs_extent_item *ei;
11675 struct btrfs_extent_inline_ref *iref;
11676 int slot = path.slots[0];
11681 struct cache_extent *entry;
11682 struct root_item_info *rii;
11684 if (slot >= btrfs_header_nritems(leaf)) {
11685 ret = btrfs_next_leaf(info->extent_root, &path);
11692 leaf = path.nodes[0];
11693 slot = path.slots[0];
11696 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11698 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11699 found_key.type != BTRFS_METADATA_ITEM_KEY)
11702 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11703 flags = btrfs_extent_flags(leaf, ei);
11705 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11706 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11709 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11710 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11711 level = found_key.offset;
11713 struct btrfs_tree_block_info *binfo;
11715 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11716 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11717 level = btrfs_tree_block_level(leaf, binfo);
11721 * For a root extent, it must be of the following type and the
11722 * first (and only one) iref in the item.
11724 type = btrfs_extent_inline_ref_type(leaf, iref);
11725 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11728 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11729 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11731 rii = malloc(sizeof(struct root_item_info));
11736 rii->cache_extent.start = root_id;
11737 rii->cache_extent.size = 1;
11738 rii->level = (u8)-1;
11739 entry = &rii->cache_extent;
11740 ret = insert_cache_extent(roots_info_cache, entry);
11743 rii = container_of(entry, struct root_item_info,
11747 ASSERT(rii->cache_extent.start == root_id);
11748 ASSERT(rii->cache_extent.size == 1);
11750 if (level > rii->level || rii->level == (u8)-1) {
11751 rii->level = level;
11752 rii->bytenr = found_key.objectid;
11753 rii->gen = btrfs_extent_generation(leaf, ei);
11754 rii->node_count = 1;
11755 } else if (level == rii->level) {
11763 btrfs_release_path(&path);
11768 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11769 struct btrfs_path *path,
11770 const struct btrfs_key *root_key,
11771 const int read_only_mode)
11773 const u64 root_id = root_key->objectid;
11774 struct cache_extent *entry;
11775 struct root_item_info *rii;
11776 struct btrfs_root_item ri;
11777 unsigned long offset;
11779 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11782 "Error: could not find extent items for root %llu\n",
11783 root_key->objectid);
11787 rii = container_of(entry, struct root_item_info, cache_extent);
11788 ASSERT(rii->cache_extent.start == root_id);
11789 ASSERT(rii->cache_extent.size == 1);
11791 if (rii->node_count != 1) {
11793 "Error: could not find btree root extent for root %llu\n",
11798 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11799 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11801 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11802 btrfs_root_level(&ri) != rii->level ||
11803 btrfs_root_generation(&ri) != rii->gen) {
11806 * If we're in repair mode but our caller told us to not update
11807 * the root item, i.e. just check if it needs to be updated, don't
11808 * print this message, since the caller will call us again shortly
11809 * for the same root item without read only mode (the caller will
11810 * open a transaction first).
11812 if (!(read_only_mode && repair))
11814 "%sroot item for root %llu,"
11815 " current bytenr %llu, current gen %llu, current level %u,"
11816 " new bytenr %llu, new gen %llu, new level %u\n",
11817 (read_only_mode ? "" : "fixing "),
11819 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11820 btrfs_root_level(&ri),
11821 rii->bytenr, rii->gen, rii->level);
11823 if (btrfs_root_generation(&ri) > rii->gen) {
11825 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11826 root_id, btrfs_root_generation(&ri), rii->gen);
11830 if (!read_only_mode) {
11831 btrfs_set_root_bytenr(&ri, rii->bytenr);
11832 btrfs_set_root_level(&ri, rii->level);
11833 btrfs_set_root_generation(&ri, rii->gen);
11834 write_extent_buffer(path->nodes[0], &ri,
11835 offset, sizeof(ri));
11845 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11846 * caused read-only snapshots to be corrupted if they were created at a moment
11847 * when the source subvolume/snapshot had orphan items. The issue was that the
11848 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11849 * node instead of the post orphan cleanup root node.
11850 * So this function, and its callees, just detects and fixes those cases. Even
11851 * though the regression was for read-only snapshots, this function applies to
11852 * any snapshot/subvolume root.
11853 * This must be run before any other repair code - not doing it so, makes other
11854 * repair code delete or modify backrefs in the extent tree for example, which
11855 * will result in an inconsistent fs after repairing the root items.
11857 static int repair_root_items(struct btrfs_fs_info *info)
11859 struct btrfs_path path;
11860 struct btrfs_key key;
11861 struct extent_buffer *leaf;
11862 struct btrfs_trans_handle *trans = NULL;
11865 int need_trans = 0;
11867 btrfs_init_path(&path);
11869 ret = build_roots_info_cache(info);
11873 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11874 key.type = BTRFS_ROOT_ITEM_KEY;
11879 * Avoid opening and committing transactions if a leaf doesn't have
11880 * any root items that need to be fixed, so that we avoid rotating
11881 * backup roots unnecessarily.
11884 trans = btrfs_start_transaction(info->tree_root, 1);
11885 if (IS_ERR(trans)) {
11886 ret = PTR_ERR(trans);
11891 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11895 leaf = path.nodes[0];
11898 struct btrfs_key found_key;
11900 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11901 int no_more_keys = find_next_key(&path, &key);
11903 btrfs_release_path(&path);
11905 ret = btrfs_commit_transaction(trans,
11917 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11919 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11921 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11924 ret = maybe_repair_root_item(info, &path, &found_key,
11929 if (!trans && repair) {
11932 btrfs_release_path(&path);
11942 free_roots_info_cache();
11943 btrfs_release_path(&path);
11945 btrfs_commit_transaction(trans, info->tree_root);
11952 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11954 struct btrfs_trans_handle *trans;
11955 struct btrfs_block_group_cache *bg_cache;
11959 /* Clear all free space cache inodes and its extent data */
11961 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11964 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11967 current = bg_cache->key.objectid + bg_cache->key.offset;
11970 /* Don't forget to set cache_generation to -1 */
11971 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11972 if (IS_ERR(trans)) {
11973 error("failed to update super block cache generation");
11974 return PTR_ERR(trans);
11976 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11977 btrfs_commit_transaction(trans, fs_info->tree_root);
11982 const char * const cmd_check_usage[] = {
11983 "btrfs check [options] <device>",
11984 "Check structural integrity of a filesystem (unmounted).",
11985 "Check structural integrity of an unmounted filesystem. Verify internal",
11986 "trees' consistency and item connectivity. In the repair mode try to",
11987 "fix the problems found. ",
11988 "WARNING: the repair mode is considered dangerous",
11990 "-s|--super <superblock> use this superblock copy",
11991 "-b|--backup use the first valid backup root copy",
11992 "--repair try to repair the filesystem",
11993 "--readonly run in read-only mode (default)",
11994 "--init-csum-tree create a new CRC tree",
11995 "--init-extent-tree create a new extent tree",
11996 "--mode <MODE> allows choice of memory/IO trade-offs",
11997 " where MODE is one of:",
11998 " original - read inodes and extents to memory (requires",
11999 " more memory, does less IO)",
12000 " lowmem - try to use less memory but read blocks again",
12002 "--check-data-csum verify checksums of data blocks",
12003 "-Q|--qgroup-report print a report on qgroup consistency",
12004 "-E|--subvol-extents <subvolid>",
12005 " print subvolume extents and sharing state",
12006 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
12007 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
12008 "-p|--progress indicate progress",
12009 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
12013 int cmd_check(int argc, char **argv)
12015 struct cache_tree root_cache;
12016 struct btrfs_root *root;
12017 struct btrfs_fs_info *info;
12020 u64 tree_root_bytenr = 0;
12021 u64 chunk_root_bytenr = 0;
12022 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
12025 int init_csum_tree = 0;
12027 int clear_space_cache = 0;
12028 int qgroup_report = 0;
12029 int qgroups_repaired = 0;
12030 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
12034 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
12035 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
12036 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
12037 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
12038 static const struct option long_options[] = {
12039 { "super", required_argument, NULL, 's' },
12040 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
12041 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
12042 { "init-csum-tree", no_argument, NULL,
12043 GETOPT_VAL_INIT_CSUM },
12044 { "init-extent-tree", no_argument, NULL,
12045 GETOPT_VAL_INIT_EXTENT },
12046 { "check-data-csum", no_argument, NULL,
12047 GETOPT_VAL_CHECK_CSUM },
12048 { "backup", no_argument, NULL, 'b' },
12049 { "subvol-extents", required_argument, NULL, 'E' },
12050 { "qgroup-report", no_argument, NULL, 'Q' },
12051 { "tree-root", required_argument, NULL, 'r' },
12052 { "chunk-root", required_argument, NULL,
12053 GETOPT_VAL_CHUNK_TREE },
12054 { "progress", no_argument, NULL, 'p' },
12055 { "mode", required_argument, NULL,
12057 { "clear-space-cache", required_argument, NULL,
12058 GETOPT_VAL_CLEAR_SPACE_CACHE},
12059 { NULL, 0, NULL, 0}
12062 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
12066 case 'a': /* ignored */ break;
12068 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
12071 num = arg_strtou64(optarg);
12072 if (num >= BTRFS_SUPER_MIRROR_MAX) {
12074 "super mirror should be less than %d",
12075 BTRFS_SUPER_MIRROR_MAX);
12078 bytenr = btrfs_sb_offset(((int)num));
12079 printf("using SB copy %llu, bytenr %llu\n", num,
12080 (unsigned long long)bytenr);
12086 subvolid = arg_strtou64(optarg);
12089 tree_root_bytenr = arg_strtou64(optarg);
12091 case GETOPT_VAL_CHUNK_TREE:
12092 chunk_root_bytenr = arg_strtou64(optarg);
12095 ctx.progress_enabled = true;
12099 usage(cmd_check_usage);
12100 case GETOPT_VAL_REPAIR:
12101 printf("enabling repair mode\n");
12103 ctree_flags |= OPEN_CTREE_WRITES;
12105 case GETOPT_VAL_READONLY:
12108 case GETOPT_VAL_INIT_CSUM:
12109 printf("Creating a new CRC tree\n");
12110 init_csum_tree = 1;
12112 ctree_flags |= OPEN_CTREE_WRITES;
12114 case GETOPT_VAL_INIT_EXTENT:
12115 init_extent_tree = 1;
12116 ctree_flags |= (OPEN_CTREE_WRITES |
12117 OPEN_CTREE_NO_BLOCK_GROUPS);
12120 case GETOPT_VAL_CHECK_CSUM:
12121 check_data_csum = 1;
12123 case GETOPT_VAL_MODE:
12124 check_mode = parse_check_mode(optarg);
12125 if (check_mode == CHECK_MODE_UNKNOWN) {
12126 error("unknown mode: %s", optarg);
12130 case GETOPT_VAL_CLEAR_SPACE_CACHE:
12131 if (strcmp(optarg, "v1") == 0) {
12132 clear_space_cache = 1;
12133 } else if (strcmp(optarg, "v2") == 0) {
12134 clear_space_cache = 2;
12135 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
12138 "invalid argument to --clear-space-cache, must be v1 or v2");
12141 ctree_flags |= OPEN_CTREE_WRITES;
12146 if (check_argc_exact(argc - optind, 1))
12147 usage(cmd_check_usage);
12149 if (ctx.progress_enabled) {
12150 ctx.tp = TASK_NOTHING;
12151 ctx.info = task_init(print_status_check, print_status_return, &ctx);
12154 /* This check is the only reason for --readonly to exist */
12155 if (readonly && repair) {
12156 error("repair options are not compatible with --readonly");
12161 * Not supported yet
12163 if (repair && check_mode == CHECK_MODE_LOWMEM) {
12164 error("low memory mode doesn't support repair yet");
12169 cache_tree_init(&root_cache);
12171 if((ret = check_mounted(argv[optind])) < 0) {
12172 error("could not check mount status: %s", strerror(-ret));
12175 error("%s is currently mounted, aborting", argv[optind]);
12180 /* only allow partial opening under repair mode */
12182 ctree_flags |= OPEN_CTREE_PARTIAL;
12184 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
12185 chunk_root_bytenr, ctree_flags);
12187 error("cannot open file system");
12192 global_info = info;
12193 root = info->fs_root;
12194 if (clear_space_cache == 1) {
12195 if (btrfs_fs_compat_ro(info,
12196 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12198 "free space cache v2 detected, use --clear-space-cache v2");
12202 printf("Clearing free space cache\n");
12203 ret = clear_free_space_cache(info);
12205 error("failed to clear free space cache");
12208 printf("Free space cache cleared\n");
12211 } else if (clear_space_cache == 2) {
12212 if (!btrfs_fs_compat_ro(info,
12213 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
12214 printf("no free space cache v2 to clear\n");
12218 printf("Clear free space cache v2\n");
12219 ret = btrfs_clear_free_space_tree(info);
12221 error("failed to clear free space cache v2: %d", ret);
12224 printf("free space cache v2 cleared\n");
12230 * repair mode will force us to commit transaction which
12231 * will make us fail to load log tree when mounting.
12233 if (repair && btrfs_super_log_root(info->super_copy)) {
12234 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
12239 ret = zero_log_tree(root);
12241 error("failed to zero log tree: %d", ret);
12246 uuid_unparse(info->super_copy->fsid, uuidbuf);
12247 if (qgroup_report) {
12248 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
12250 ret = qgroup_verify_all(info);
12256 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
12257 subvolid, argv[optind], uuidbuf);
12258 ret = print_extent_state(info, subvolid);
12261 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
12263 if (!extent_buffer_uptodate(info->tree_root->node) ||
12264 !extent_buffer_uptodate(info->dev_root->node) ||
12265 !extent_buffer_uptodate(info->chunk_root->node)) {
12266 error("critical roots corrupted, unable to check the filesystem");
12271 if (init_extent_tree || init_csum_tree) {
12272 struct btrfs_trans_handle *trans;
12274 trans = btrfs_start_transaction(info->extent_root, 0);
12275 if (IS_ERR(trans)) {
12276 error("error starting transaction");
12277 ret = PTR_ERR(trans);
12281 if (init_extent_tree) {
12282 printf("Creating a new extent tree\n");
12283 ret = reinit_extent_tree(trans, info);
12288 if (init_csum_tree) {
12289 printf("Reinitialize checksum tree\n");
12290 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
12292 error("checksum tree initialization failed: %d",
12298 ret = fill_csum_tree(trans, info->csum_root,
12301 error("checksum tree refilling failed: %d", ret);
12306 * Ok now we commit and run the normal fsck, which will add
12307 * extent entries for all of the items it finds.
12309 ret = btrfs_commit_transaction(trans, info->extent_root);
12313 if (!extent_buffer_uptodate(info->extent_root->node)) {
12314 error("critical: extent_root, unable to check the filesystem");
12318 if (!extent_buffer_uptodate(info->csum_root->node)) {
12319 error("critical: csum_root, unable to check the filesystem");
12324 if (!ctx.progress_enabled)
12325 fprintf(stderr, "checking extents\n");
12326 if (check_mode == CHECK_MODE_LOWMEM)
12327 ret = check_chunks_and_extents_v2(root);
12329 ret = check_chunks_and_extents(root);
12332 "errors found in extent allocation tree or chunk allocation");
12334 ret = repair_root_items(info);
12338 fprintf(stderr, "Fixed %d roots.\n", ret);
12340 } else if (ret > 0) {
12342 "Found %d roots with an outdated root item.\n",
12345 "Please run a filesystem check with the option --repair to fix them.\n");
12350 if (!ctx.progress_enabled) {
12351 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
12352 fprintf(stderr, "checking free space tree\n");
12354 fprintf(stderr, "checking free space cache\n");
12356 ret = check_space_cache(root);
12361 * We used to have to have these hole extents in between our real
12362 * extents so if we don't have this flag set we need to make sure there
12363 * are no gaps in the file extents for inodes, otherwise we can just
12364 * ignore it when this happens.
12366 no_holes = btrfs_fs_incompat(root->fs_info,
12367 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
12368 if (!ctx.progress_enabled)
12369 fprintf(stderr, "checking fs roots\n");
12370 ret = check_fs_roots(root, &root_cache);
12374 fprintf(stderr, "checking csums\n");
12375 ret = check_csums(root);
12379 fprintf(stderr, "checking root refs\n");
12380 ret = check_root_refs(root, &root_cache);
12384 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
12385 struct extent_buffer *eb;
12387 eb = list_first_entry(&root->fs_info->recow_ebs,
12388 struct extent_buffer, recow);
12389 list_del_init(&eb->recow);
12390 ret = recow_extent_buffer(root, eb);
12395 while (!list_empty(&delete_items)) {
12396 struct bad_item *bad;
12398 bad = list_first_entry(&delete_items, struct bad_item, list);
12399 list_del_init(&bad->list);
12401 ret = delete_bad_item(root, bad);
12405 if (info->quota_enabled) {
12407 fprintf(stderr, "checking quota groups\n");
12408 err = qgroup_verify_all(info);
12412 err = repair_qgroups(info, &qgroups_repaired);
12417 if (!list_empty(&root->fs_info->recow_ebs)) {
12418 error("transid errors in file system");
12422 /* Don't override original ret */
12423 if (!ret && qgroups_repaired)
12424 ret = qgroups_repaired;
12426 if (found_old_backref) { /*
12427 * there was a disk format change when mixed
12428 * backref was in testing tree. The old format
12429 * existed about one week.
12431 printf("\n * Found old mixed backref format. "
12432 "The old format is not supported! *"
12433 "\n * Please mount the FS in readonly mode, "
12434 "backup data and re-format the FS. *\n\n");
12437 printf("found %llu bytes used err is %d\n",
12438 (unsigned long long)bytes_used, ret);
12439 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
12440 printf("total tree bytes: %llu\n",
12441 (unsigned long long)total_btree_bytes);
12442 printf("total fs tree bytes: %llu\n",
12443 (unsigned long long)total_fs_tree_bytes);
12444 printf("total extent tree bytes: %llu\n",
12445 (unsigned long long)total_extent_tree_bytes);
12446 printf("btree space waste bytes: %llu\n",
12447 (unsigned long long)btree_space_waste);
12448 printf("file data blocks allocated: %llu\n referenced %llu\n",
12449 (unsigned long long)data_bytes_allocated,
12450 (unsigned long long)data_bytes_referenced);
12452 free_qgroup_counts();
12453 free_root_recs_tree(&root_cache);
12457 if (ctx.progress_enabled)
12458 task_deinit(ctx.info);