2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
97 return list_entry(entry, struct extent_backref, list);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
121 * Much like data_backref, just removed the undetermined members
122 * and change it to use list_head.
123 * During extent scan, it is stored in root->orphan_data_extent.
124 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
126 struct orphan_data_extent {
127 struct list_head list;
135 struct tree_backref {
136 struct extent_backref node;
143 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
145 return container_of(back, struct tree_backref, node);
148 /* Explicit initialization for extent_record::flag_block_full_backref */
149 enum { FLAG_UNSET = 2 };
151 struct extent_record {
152 struct list_head backrefs;
153 struct list_head dups;
154 struct list_head list;
155 struct cache_extent cache;
156 struct btrfs_disk_key parent_key;
161 u64 extent_item_refs;
163 u64 parent_generation;
167 unsigned int flag_block_full_backref:2;
168 unsigned int found_rec:1;
169 unsigned int content_checked:1;
170 unsigned int owner_ref_checked:1;
171 unsigned int is_root:1;
172 unsigned int metadata:1;
173 unsigned int bad_full_backref:1;
174 unsigned int crossing_stripes:1;
175 unsigned int wrong_chunk_type:1;
178 static inline struct extent_record* to_extent_record(struct list_head *entry)
180 return container_of(entry, struct extent_record, list);
183 struct inode_backref {
184 struct list_head list;
185 unsigned int found_dir_item:1;
186 unsigned int found_dir_index:1;
187 unsigned int found_inode_ref:1;
197 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
199 return list_entry(entry, struct inode_backref, list);
202 struct root_item_record {
203 struct list_head list;
210 struct btrfs_key drop_key;
213 #define REF_ERR_NO_DIR_ITEM (1 << 0)
214 #define REF_ERR_NO_DIR_INDEX (1 << 1)
215 #define REF_ERR_NO_INODE_REF (1 << 2)
216 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
217 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
218 #define REF_ERR_DUP_INODE_REF (1 << 5)
219 #define REF_ERR_INDEX_UNMATCH (1 << 6)
220 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
221 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
222 #define REF_ERR_NO_ROOT_REF (1 << 9)
223 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
224 #define REF_ERR_DUP_ROOT_REF (1 << 11)
225 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
227 struct file_extent_hole {
233 struct inode_record {
234 struct list_head backrefs;
235 unsigned int checked:1;
236 unsigned int merging:1;
237 unsigned int found_inode_item:1;
238 unsigned int found_dir_item:1;
239 unsigned int found_file_extent:1;
240 unsigned int found_csum_item:1;
241 unsigned int some_csum_missing:1;
242 unsigned int nodatasum:1;
255 struct rb_root holes;
256 struct list_head orphan_extents;
261 #define I_ERR_NO_INODE_ITEM (1 << 0)
262 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
263 #define I_ERR_DUP_INODE_ITEM (1 << 2)
264 #define I_ERR_DUP_DIR_INDEX (1 << 3)
265 #define I_ERR_ODD_DIR_ITEM (1 << 4)
266 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
267 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
268 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
269 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
270 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
271 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
272 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
273 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
274 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
275 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
277 struct root_backref {
278 struct list_head list;
279 unsigned int found_dir_item:1;
280 unsigned int found_dir_index:1;
281 unsigned int found_back_ref:1;
282 unsigned int found_forward_ref:1;
283 unsigned int reachable:1;
292 static inline struct root_backref* to_root_backref(struct list_head *entry)
294 return list_entry(entry, struct root_backref, list);
298 struct list_head backrefs;
299 struct cache_extent cache;
300 unsigned int found_root_item:1;
306 struct cache_extent cache;
311 struct cache_extent cache;
312 struct cache_tree root_cache;
313 struct cache_tree inode_cache;
314 struct inode_record *current;
323 struct walk_control {
324 struct cache_tree shared;
325 struct shared_node *nodes[BTRFS_MAX_LEVEL];
331 struct btrfs_key key;
333 struct list_head list;
336 struct extent_entry {
341 struct list_head list;
344 struct root_item_info {
345 /* level of the root */
347 /* number of nodes at this level, must be 1 for a root */
351 struct cache_extent cache_extent;
355 * Error bit for low memory mode check.
357 * Currently no caller cares about it yet. Just internal use for error
360 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
361 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
362 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
363 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
364 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
365 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
366 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
367 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
368 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
369 #define CHUNK_TYPE_MISMATCH (1 << 8)
371 static void *print_status_check(void *p)
373 struct task_ctx *priv = p;
374 const char work_indicator[] = { '.', 'o', 'O', 'o' };
376 static char *task_position_string[] = {
378 "checking free space cache",
382 task_period_start(priv->info, 1000 /* 1s */);
384 if (priv->tp == TASK_NOTHING)
388 printf("%s [%c]\r", task_position_string[priv->tp],
389 work_indicator[count % 4]);
392 task_period_wait(priv->info);
397 static int print_status_return(void *p)
405 static enum btrfs_check_mode parse_check_mode(const char *str)
407 if (strcmp(str, "lowmem") == 0)
408 return CHECK_MODE_LOWMEM;
409 if (strcmp(str, "orig") == 0)
410 return CHECK_MODE_ORIGINAL;
411 if (strcmp(str, "original") == 0)
412 return CHECK_MODE_ORIGINAL;
414 return CHECK_MODE_UNKNOWN;
417 /* Compatible function to allow reuse of old codes */
418 static u64 first_extent_gap(struct rb_root *holes)
420 struct file_extent_hole *hole;
422 if (RB_EMPTY_ROOT(holes))
425 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
429 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
431 struct file_extent_hole *hole1;
432 struct file_extent_hole *hole2;
434 hole1 = rb_entry(node1, struct file_extent_hole, node);
435 hole2 = rb_entry(node2, struct file_extent_hole, node);
437 if (hole1->start > hole2->start)
439 if (hole1->start < hole2->start)
441 /* Now hole1->start == hole2->start */
442 if (hole1->len >= hole2->len)
444 * Hole 1 will be merge center
445 * Same hole will be merged later
448 /* Hole 2 will be merge center */
453 * Add a hole to the record
455 * This will do hole merge for copy_file_extent_holes(),
456 * which will ensure there won't be continuous holes.
458 static int add_file_extent_hole(struct rb_root *holes,
461 struct file_extent_hole *hole;
462 struct file_extent_hole *prev = NULL;
463 struct file_extent_hole *next = NULL;
465 hole = malloc(sizeof(*hole));
470 /* Since compare will not return 0, no -EEXIST will happen */
471 rb_insert(holes, &hole->node, compare_hole);
473 /* simple merge with previous hole */
474 if (rb_prev(&hole->node))
475 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
477 if (prev && prev->start + prev->len >= hole->start) {
478 hole->len = hole->start + hole->len - prev->start;
479 hole->start = prev->start;
480 rb_erase(&prev->node, holes);
485 /* iterate merge with next holes */
487 if (!rb_next(&hole->node))
489 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
491 if (hole->start + hole->len >= next->start) {
492 if (hole->start + hole->len <= next->start + next->len)
493 hole->len = next->start + next->len -
495 rb_erase(&next->node, holes);
504 static int compare_hole_range(struct rb_node *node, void *data)
506 struct file_extent_hole *hole;
509 hole = (struct file_extent_hole *)data;
512 hole = rb_entry(node, struct file_extent_hole, node);
513 if (start < hole->start)
515 if (start >= hole->start && start < hole->start + hole->len)
521 * Delete a hole in the record
523 * This will do the hole split and is much restrict than add.
525 static int del_file_extent_hole(struct rb_root *holes,
528 struct file_extent_hole *hole;
529 struct file_extent_hole tmp;
534 struct rb_node *node;
541 node = rb_search(holes, &tmp, compare_hole_range, NULL);
544 hole = rb_entry(node, struct file_extent_hole, node);
545 if (start + len > hole->start + hole->len)
549 * Now there will be no overlap, delete the hole and re-add the
550 * split(s) if they exists.
552 if (start > hole->start) {
553 prev_start = hole->start;
554 prev_len = start - hole->start;
557 if (hole->start + hole->len > start + len) {
558 next_start = start + len;
559 next_len = hole->start + hole->len - start - len;
562 rb_erase(node, holes);
565 ret = add_file_extent_hole(holes, prev_start, prev_len);
570 ret = add_file_extent_hole(holes, next_start, next_len);
577 static int copy_file_extent_holes(struct rb_root *dst,
580 struct file_extent_hole *hole;
581 struct rb_node *node;
584 node = rb_first(src);
586 hole = rb_entry(node, struct file_extent_hole, node);
587 ret = add_file_extent_hole(dst, hole->start, hole->len);
590 node = rb_next(node);
595 static void free_file_extent_holes(struct rb_root *holes)
597 struct rb_node *node;
598 struct file_extent_hole *hole;
600 node = rb_first(holes);
602 hole = rb_entry(node, struct file_extent_hole, node);
603 rb_erase(node, holes);
605 node = rb_first(holes);
609 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
611 static void record_root_in_trans(struct btrfs_trans_handle *trans,
612 struct btrfs_root *root)
614 if (root->last_trans != trans->transid) {
615 root->track_dirty = 1;
616 root->last_trans = trans->transid;
617 root->commit_root = root->node;
618 extent_buffer_get(root->node);
622 static u8 imode_to_type(u32 imode)
625 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
626 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
627 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
628 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
629 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
630 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
631 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
632 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
635 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
639 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
641 struct device_record *rec1;
642 struct device_record *rec2;
644 rec1 = rb_entry(node1, struct device_record, node);
645 rec2 = rb_entry(node2, struct device_record, node);
646 if (rec1->devid > rec2->devid)
648 else if (rec1->devid < rec2->devid)
654 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
656 struct inode_record *rec;
657 struct inode_backref *backref;
658 struct inode_backref *orig;
659 struct inode_backref *tmp;
660 struct orphan_data_extent *src_orphan;
661 struct orphan_data_extent *dst_orphan;
666 rec = malloc(sizeof(*rec));
668 return ERR_PTR(-ENOMEM);
669 memcpy(rec, orig_rec, sizeof(*rec));
671 INIT_LIST_HEAD(&rec->backrefs);
672 INIT_LIST_HEAD(&rec->orphan_extents);
673 rec->holes = RB_ROOT;
675 list_for_each_entry(orig, &orig_rec->backrefs, list) {
676 size = sizeof(*orig) + orig->namelen + 1;
677 backref = malloc(size);
682 memcpy(backref, orig, size);
683 list_add_tail(&backref->list, &rec->backrefs);
685 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
686 dst_orphan = malloc(sizeof(*dst_orphan));
691 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
692 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
694 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
701 rb = rb_first(&rec->holes);
703 struct file_extent_hole *hole;
705 hole = rb_entry(rb, struct file_extent_hole, node);
711 if (!list_empty(&rec->backrefs))
712 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
713 list_del(&orig->list);
717 if (!list_empty(&rec->orphan_extents))
718 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
719 list_del(&orig->list);
728 static void print_orphan_data_extents(struct list_head *orphan_extents,
731 struct orphan_data_extent *orphan;
733 if (list_empty(orphan_extents))
735 printf("The following data extent is lost in tree %llu:\n",
737 list_for_each_entry(orphan, orphan_extents, list) {
738 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
739 orphan->objectid, orphan->offset, orphan->disk_bytenr,
744 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
746 u64 root_objectid = root->root_key.objectid;
747 int errors = rec->errors;
751 /* reloc root errors, we print its corresponding fs root objectid*/
752 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
753 root_objectid = root->root_key.offset;
754 fprintf(stderr, "reloc");
756 fprintf(stderr, "root %llu inode %llu errors %x",
757 (unsigned long long) root_objectid,
758 (unsigned long long) rec->ino, rec->errors);
760 if (errors & I_ERR_NO_INODE_ITEM)
761 fprintf(stderr, ", no inode item");
762 if (errors & I_ERR_NO_ORPHAN_ITEM)
763 fprintf(stderr, ", no orphan item");
764 if (errors & I_ERR_DUP_INODE_ITEM)
765 fprintf(stderr, ", dup inode item");
766 if (errors & I_ERR_DUP_DIR_INDEX)
767 fprintf(stderr, ", dup dir index");
768 if (errors & I_ERR_ODD_DIR_ITEM)
769 fprintf(stderr, ", odd dir item");
770 if (errors & I_ERR_ODD_FILE_EXTENT)
771 fprintf(stderr, ", odd file extent");
772 if (errors & I_ERR_BAD_FILE_EXTENT)
773 fprintf(stderr, ", bad file extent");
774 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
775 fprintf(stderr, ", file extent overlap");
776 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
777 fprintf(stderr, ", file extent discount");
778 if (errors & I_ERR_DIR_ISIZE_WRONG)
779 fprintf(stderr, ", dir isize wrong");
780 if (errors & I_ERR_FILE_NBYTES_WRONG)
781 fprintf(stderr, ", nbytes wrong");
782 if (errors & I_ERR_ODD_CSUM_ITEM)
783 fprintf(stderr, ", odd csum item");
784 if (errors & I_ERR_SOME_CSUM_MISSING)
785 fprintf(stderr, ", some csum missing");
786 if (errors & I_ERR_LINK_COUNT_WRONG)
787 fprintf(stderr, ", link count wrong");
788 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
789 fprintf(stderr, ", orphan file extent");
790 fprintf(stderr, "\n");
791 /* Print the orphan extents if needed */
792 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
793 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
795 /* Print the holes if needed */
796 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
797 struct file_extent_hole *hole;
798 struct rb_node *node;
801 node = rb_first(&rec->holes);
802 fprintf(stderr, "Found file extent holes:\n");
805 hole = rb_entry(node, struct file_extent_hole, node);
806 fprintf(stderr, "\tstart: %llu, len: %llu\n",
807 hole->start, hole->len);
808 node = rb_next(node);
811 fprintf(stderr, "\tstart: 0, len: %llu\n",
812 round_up(rec->isize, root->sectorsize));
816 static void print_ref_error(int errors)
818 if (errors & REF_ERR_NO_DIR_ITEM)
819 fprintf(stderr, ", no dir item");
820 if (errors & REF_ERR_NO_DIR_INDEX)
821 fprintf(stderr, ", no dir index");
822 if (errors & REF_ERR_NO_INODE_REF)
823 fprintf(stderr, ", no inode ref");
824 if (errors & REF_ERR_DUP_DIR_ITEM)
825 fprintf(stderr, ", dup dir item");
826 if (errors & REF_ERR_DUP_DIR_INDEX)
827 fprintf(stderr, ", dup dir index");
828 if (errors & REF_ERR_DUP_INODE_REF)
829 fprintf(stderr, ", dup inode ref");
830 if (errors & REF_ERR_INDEX_UNMATCH)
831 fprintf(stderr, ", index mismatch");
832 if (errors & REF_ERR_FILETYPE_UNMATCH)
833 fprintf(stderr, ", filetype mismatch");
834 if (errors & REF_ERR_NAME_TOO_LONG)
835 fprintf(stderr, ", name too long");
836 if (errors & REF_ERR_NO_ROOT_REF)
837 fprintf(stderr, ", no root ref");
838 if (errors & REF_ERR_NO_ROOT_BACKREF)
839 fprintf(stderr, ", no root backref");
840 if (errors & REF_ERR_DUP_ROOT_REF)
841 fprintf(stderr, ", dup root ref");
842 if (errors & REF_ERR_DUP_ROOT_BACKREF)
843 fprintf(stderr, ", dup root backref");
844 fprintf(stderr, "\n");
847 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
850 struct ptr_node *node;
851 struct cache_extent *cache;
852 struct inode_record *rec = NULL;
855 cache = lookup_cache_extent(inode_cache, ino, 1);
857 node = container_of(cache, struct ptr_node, cache);
859 if (mod && rec->refs > 1) {
860 node->data = clone_inode_rec(rec);
861 if (IS_ERR(node->data))
867 rec = calloc(1, sizeof(*rec));
869 return ERR_PTR(-ENOMEM);
871 rec->extent_start = (u64)-1;
873 INIT_LIST_HEAD(&rec->backrefs);
874 INIT_LIST_HEAD(&rec->orphan_extents);
875 rec->holes = RB_ROOT;
877 node = malloc(sizeof(*node));
880 return ERR_PTR(-ENOMEM);
882 node->cache.start = ino;
883 node->cache.size = 1;
886 if (ino == BTRFS_FREE_INO_OBJECTID)
889 ret = insert_cache_extent(inode_cache, &node->cache);
891 return ERR_PTR(-EEXIST);
896 static void free_orphan_data_extents(struct list_head *orphan_extents)
898 struct orphan_data_extent *orphan;
900 while (!list_empty(orphan_extents)) {
901 orphan = list_entry(orphan_extents->next,
902 struct orphan_data_extent, list);
903 list_del(&orphan->list);
908 static void free_inode_rec(struct inode_record *rec)
910 struct inode_backref *backref;
915 while (!list_empty(&rec->backrefs)) {
916 backref = to_inode_backref(rec->backrefs.next);
917 list_del(&backref->list);
920 free_orphan_data_extents(&rec->orphan_extents);
921 free_file_extent_holes(&rec->holes);
925 static int can_free_inode_rec(struct inode_record *rec)
927 if (!rec->errors && rec->checked && rec->found_inode_item &&
928 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
933 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
934 struct inode_record *rec)
936 struct cache_extent *cache;
937 struct inode_backref *tmp, *backref;
938 struct ptr_node *node;
941 if (!rec->found_inode_item)
944 filetype = imode_to_type(rec->imode);
945 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
946 if (backref->found_dir_item && backref->found_dir_index) {
947 if (backref->filetype != filetype)
948 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
949 if (!backref->errors && backref->found_inode_ref &&
950 rec->nlink == rec->found_link) {
951 list_del(&backref->list);
957 if (!rec->checked || rec->merging)
960 if (S_ISDIR(rec->imode)) {
961 if (rec->found_size != rec->isize)
962 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
963 if (rec->found_file_extent)
964 rec->errors |= I_ERR_ODD_FILE_EXTENT;
965 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
966 if (rec->found_dir_item)
967 rec->errors |= I_ERR_ODD_DIR_ITEM;
968 if (rec->found_size != rec->nbytes)
969 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
970 if (rec->nlink > 0 && !no_holes &&
971 (rec->extent_end < rec->isize ||
972 first_extent_gap(&rec->holes) < rec->isize))
973 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
976 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
977 if (rec->found_csum_item && rec->nodatasum)
978 rec->errors |= I_ERR_ODD_CSUM_ITEM;
979 if (rec->some_csum_missing && !rec->nodatasum)
980 rec->errors |= I_ERR_SOME_CSUM_MISSING;
983 BUG_ON(rec->refs != 1);
984 if (can_free_inode_rec(rec)) {
985 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
986 node = container_of(cache, struct ptr_node, cache);
987 BUG_ON(node->data != rec);
988 remove_cache_extent(inode_cache, &node->cache);
994 static int check_orphan_item(struct btrfs_root *root, u64 ino)
996 struct btrfs_path path;
997 struct btrfs_key key;
1000 key.objectid = BTRFS_ORPHAN_OBJECTID;
1001 key.type = BTRFS_ORPHAN_ITEM_KEY;
1004 btrfs_init_path(&path);
1005 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1006 btrfs_release_path(&path);
1012 static int process_inode_item(struct extent_buffer *eb,
1013 int slot, struct btrfs_key *key,
1014 struct shared_node *active_node)
1016 struct inode_record *rec;
1017 struct btrfs_inode_item *item;
1019 rec = active_node->current;
1020 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1021 if (rec->found_inode_item) {
1022 rec->errors |= I_ERR_DUP_INODE_ITEM;
1025 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1026 rec->nlink = btrfs_inode_nlink(eb, item);
1027 rec->isize = btrfs_inode_size(eb, item);
1028 rec->nbytes = btrfs_inode_nbytes(eb, item);
1029 rec->imode = btrfs_inode_mode(eb, item);
1030 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1032 rec->found_inode_item = 1;
1033 if (rec->nlink == 0)
1034 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1035 maybe_free_inode_rec(&active_node->inode_cache, rec);
1039 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1041 int namelen, u64 dir)
1043 struct inode_backref *backref;
1045 list_for_each_entry(backref, &rec->backrefs, list) {
1046 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1048 if (backref->dir != dir || backref->namelen != namelen)
1050 if (memcmp(name, backref->name, namelen))
1055 backref = malloc(sizeof(*backref) + namelen + 1);
1058 memset(backref, 0, sizeof(*backref));
1060 backref->namelen = namelen;
1061 memcpy(backref->name, name, namelen);
1062 backref->name[namelen] = '\0';
1063 list_add_tail(&backref->list, &rec->backrefs);
1067 static int add_inode_backref(struct cache_tree *inode_cache,
1068 u64 ino, u64 dir, u64 index,
1069 const char *name, int namelen,
1070 u8 filetype, u8 itemtype, int errors)
1072 struct inode_record *rec;
1073 struct inode_backref *backref;
1075 rec = get_inode_rec(inode_cache, ino, 1);
1076 BUG_ON(IS_ERR(rec));
1077 backref = get_inode_backref(rec, name, namelen, dir);
1080 backref->errors |= errors;
1081 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1082 if (backref->found_dir_index)
1083 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1084 if (backref->found_inode_ref && backref->index != index)
1085 backref->errors |= REF_ERR_INDEX_UNMATCH;
1086 if (backref->found_dir_item && backref->filetype != filetype)
1087 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1089 backref->index = index;
1090 backref->filetype = filetype;
1091 backref->found_dir_index = 1;
1092 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1094 if (backref->found_dir_item)
1095 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1096 if (backref->found_dir_index && backref->filetype != filetype)
1097 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1099 backref->filetype = filetype;
1100 backref->found_dir_item = 1;
1101 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1102 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1103 if (backref->found_inode_ref)
1104 backref->errors |= REF_ERR_DUP_INODE_REF;
1105 if (backref->found_dir_index && backref->index != index)
1106 backref->errors |= REF_ERR_INDEX_UNMATCH;
1108 backref->index = index;
1110 backref->ref_type = itemtype;
1111 backref->found_inode_ref = 1;
1116 maybe_free_inode_rec(inode_cache, rec);
1120 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1121 struct cache_tree *dst_cache)
1123 struct inode_backref *backref;
1128 list_for_each_entry(backref, &src->backrefs, list) {
1129 if (backref->found_dir_index) {
1130 add_inode_backref(dst_cache, dst->ino, backref->dir,
1131 backref->index, backref->name,
1132 backref->namelen, backref->filetype,
1133 BTRFS_DIR_INDEX_KEY, backref->errors);
1135 if (backref->found_dir_item) {
1137 add_inode_backref(dst_cache, dst->ino,
1138 backref->dir, 0, backref->name,
1139 backref->namelen, backref->filetype,
1140 BTRFS_DIR_ITEM_KEY, backref->errors);
1142 if (backref->found_inode_ref) {
1143 add_inode_backref(dst_cache, dst->ino,
1144 backref->dir, backref->index,
1145 backref->name, backref->namelen, 0,
1146 backref->ref_type, backref->errors);
1150 if (src->found_dir_item)
1151 dst->found_dir_item = 1;
1152 if (src->found_file_extent)
1153 dst->found_file_extent = 1;
1154 if (src->found_csum_item)
1155 dst->found_csum_item = 1;
1156 if (src->some_csum_missing)
1157 dst->some_csum_missing = 1;
1158 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1159 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1164 BUG_ON(src->found_link < dir_count);
1165 dst->found_link += src->found_link - dir_count;
1166 dst->found_size += src->found_size;
1167 if (src->extent_start != (u64)-1) {
1168 if (dst->extent_start == (u64)-1) {
1169 dst->extent_start = src->extent_start;
1170 dst->extent_end = src->extent_end;
1172 if (dst->extent_end > src->extent_start)
1173 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1174 else if (dst->extent_end < src->extent_start) {
1175 ret = add_file_extent_hole(&dst->holes,
1177 src->extent_start - dst->extent_end);
1179 if (dst->extent_end < src->extent_end)
1180 dst->extent_end = src->extent_end;
1184 dst->errors |= src->errors;
1185 if (src->found_inode_item) {
1186 if (!dst->found_inode_item) {
1187 dst->nlink = src->nlink;
1188 dst->isize = src->isize;
1189 dst->nbytes = src->nbytes;
1190 dst->imode = src->imode;
1191 dst->nodatasum = src->nodatasum;
1192 dst->found_inode_item = 1;
1194 dst->errors |= I_ERR_DUP_INODE_ITEM;
1202 static int splice_shared_node(struct shared_node *src_node,
1203 struct shared_node *dst_node)
1205 struct cache_extent *cache;
1206 struct ptr_node *node, *ins;
1207 struct cache_tree *src, *dst;
1208 struct inode_record *rec, *conflict;
1209 u64 current_ino = 0;
1213 if (--src_node->refs == 0)
1215 if (src_node->current)
1216 current_ino = src_node->current->ino;
1218 src = &src_node->root_cache;
1219 dst = &dst_node->root_cache;
1221 cache = search_cache_extent(src, 0);
1223 node = container_of(cache, struct ptr_node, cache);
1225 cache = next_cache_extent(cache);
1228 remove_cache_extent(src, &node->cache);
1231 ins = malloc(sizeof(*ins));
1233 ins->cache.start = node->cache.start;
1234 ins->cache.size = node->cache.size;
1238 ret = insert_cache_extent(dst, &ins->cache);
1239 if (ret == -EEXIST) {
1240 conflict = get_inode_rec(dst, rec->ino, 1);
1241 BUG_ON(IS_ERR(conflict));
1242 merge_inode_recs(rec, conflict, dst);
1244 conflict->checked = 1;
1245 if (dst_node->current == conflict)
1246 dst_node->current = NULL;
1248 maybe_free_inode_rec(dst, conflict);
1249 free_inode_rec(rec);
1256 if (src == &src_node->root_cache) {
1257 src = &src_node->inode_cache;
1258 dst = &dst_node->inode_cache;
1262 if (current_ino > 0 && (!dst_node->current ||
1263 current_ino > dst_node->current->ino)) {
1264 if (dst_node->current) {
1265 dst_node->current->checked = 1;
1266 maybe_free_inode_rec(dst, dst_node->current);
1268 dst_node->current = get_inode_rec(dst, current_ino, 1);
1269 BUG_ON(IS_ERR(dst_node->current));
1274 static void free_inode_ptr(struct cache_extent *cache)
1276 struct ptr_node *node;
1277 struct inode_record *rec;
1279 node = container_of(cache, struct ptr_node, cache);
1281 free_inode_rec(rec);
1285 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1287 static struct shared_node *find_shared_node(struct cache_tree *shared,
1290 struct cache_extent *cache;
1291 struct shared_node *node;
1293 cache = lookup_cache_extent(shared, bytenr, 1);
1295 node = container_of(cache, struct shared_node, cache);
1301 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1304 struct shared_node *node;
1306 node = calloc(1, sizeof(*node));
1309 node->cache.start = bytenr;
1310 node->cache.size = 1;
1311 cache_tree_init(&node->root_cache);
1312 cache_tree_init(&node->inode_cache);
1315 ret = insert_cache_extent(shared, &node->cache);
1320 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1321 struct walk_control *wc, int level)
1323 struct shared_node *node;
1324 struct shared_node *dest;
1327 if (level == wc->active_node)
1330 BUG_ON(wc->active_node <= level);
1331 node = find_shared_node(&wc->shared, bytenr);
1333 ret = add_shared_node(&wc->shared, bytenr, refs);
1335 node = find_shared_node(&wc->shared, bytenr);
1336 wc->nodes[level] = node;
1337 wc->active_node = level;
1341 if (wc->root_level == wc->active_node &&
1342 btrfs_root_refs(&root->root_item) == 0) {
1343 if (--node->refs == 0) {
1344 free_inode_recs_tree(&node->root_cache);
1345 free_inode_recs_tree(&node->inode_cache);
1346 remove_cache_extent(&wc->shared, &node->cache);
1352 dest = wc->nodes[wc->active_node];
1353 splice_shared_node(node, dest);
1354 if (node->refs == 0) {
1355 remove_cache_extent(&wc->shared, &node->cache);
1361 static int leave_shared_node(struct btrfs_root *root,
1362 struct walk_control *wc, int level)
1364 struct shared_node *node;
1365 struct shared_node *dest;
1368 if (level == wc->root_level)
1371 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1375 BUG_ON(i >= BTRFS_MAX_LEVEL);
1377 node = wc->nodes[wc->active_node];
1378 wc->nodes[wc->active_node] = NULL;
1379 wc->active_node = i;
1381 dest = wc->nodes[wc->active_node];
1382 if (wc->active_node < wc->root_level ||
1383 btrfs_root_refs(&root->root_item) > 0) {
1384 BUG_ON(node->refs <= 1);
1385 splice_shared_node(node, dest);
1387 BUG_ON(node->refs < 2);
1396 * 1 - if the root with id child_root_id is a child of root parent_root_id
1397 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1398 * has other root(s) as parent(s)
1399 * 2 - if the root child_root_id doesn't have any parent roots
1401 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1404 struct btrfs_path path;
1405 struct btrfs_key key;
1406 struct extent_buffer *leaf;
1410 btrfs_init_path(&path);
1412 key.objectid = parent_root_id;
1413 key.type = BTRFS_ROOT_REF_KEY;
1414 key.offset = child_root_id;
1415 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1419 btrfs_release_path(&path);
1423 key.objectid = child_root_id;
1424 key.type = BTRFS_ROOT_BACKREF_KEY;
1426 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1432 leaf = path.nodes[0];
1433 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1434 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1437 leaf = path.nodes[0];
1440 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1441 if (key.objectid != child_root_id ||
1442 key.type != BTRFS_ROOT_BACKREF_KEY)
1447 if (key.offset == parent_root_id) {
1448 btrfs_release_path(&path);
1455 btrfs_release_path(&path);
1458 return has_parent ? 0 : 2;
1461 static int process_dir_item(struct btrfs_root *root,
1462 struct extent_buffer *eb,
1463 int slot, struct btrfs_key *key,
1464 struct shared_node *active_node)
1474 struct btrfs_dir_item *di;
1475 struct inode_record *rec;
1476 struct cache_tree *root_cache;
1477 struct cache_tree *inode_cache;
1478 struct btrfs_key location;
1479 char namebuf[BTRFS_NAME_LEN];
1481 root_cache = &active_node->root_cache;
1482 inode_cache = &active_node->inode_cache;
1483 rec = active_node->current;
1484 rec->found_dir_item = 1;
1486 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1487 total = btrfs_item_size_nr(eb, slot);
1488 while (cur < total) {
1490 btrfs_dir_item_key_to_cpu(eb, di, &location);
1491 name_len = btrfs_dir_name_len(eb, di);
1492 data_len = btrfs_dir_data_len(eb, di);
1493 filetype = btrfs_dir_type(eb, di);
1495 rec->found_size += name_len;
1496 if (name_len <= BTRFS_NAME_LEN) {
1500 len = BTRFS_NAME_LEN;
1501 error = REF_ERR_NAME_TOO_LONG;
1503 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1505 if (location.type == BTRFS_INODE_ITEM_KEY) {
1506 add_inode_backref(inode_cache, location.objectid,
1507 key->objectid, key->offset, namebuf,
1508 len, filetype, key->type, error);
1509 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1510 add_inode_backref(root_cache, location.objectid,
1511 key->objectid, key->offset,
1512 namebuf, len, filetype,
1515 fprintf(stderr, "invalid location in dir item %u\n",
1517 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1518 key->objectid, key->offset, namebuf,
1519 len, filetype, key->type, error);
1522 len = sizeof(*di) + name_len + data_len;
1523 di = (struct btrfs_dir_item *)((char *)di + len);
1526 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1527 rec->errors |= I_ERR_DUP_DIR_INDEX;
1532 static int process_inode_ref(struct extent_buffer *eb,
1533 int slot, struct btrfs_key *key,
1534 struct shared_node *active_node)
1542 struct cache_tree *inode_cache;
1543 struct btrfs_inode_ref *ref;
1544 char namebuf[BTRFS_NAME_LEN];
1546 inode_cache = &active_node->inode_cache;
1548 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1549 total = btrfs_item_size_nr(eb, slot);
1550 while (cur < total) {
1551 name_len = btrfs_inode_ref_name_len(eb, ref);
1552 index = btrfs_inode_ref_index(eb, ref);
1553 if (name_len <= BTRFS_NAME_LEN) {
1557 len = BTRFS_NAME_LEN;
1558 error = REF_ERR_NAME_TOO_LONG;
1560 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1561 add_inode_backref(inode_cache, key->objectid, key->offset,
1562 index, namebuf, len, 0, key->type, error);
1564 len = sizeof(*ref) + name_len;
1565 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1571 static int process_inode_extref(struct extent_buffer *eb,
1572 int slot, struct btrfs_key *key,
1573 struct shared_node *active_node)
1582 struct cache_tree *inode_cache;
1583 struct btrfs_inode_extref *extref;
1584 char namebuf[BTRFS_NAME_LEN];
1586 inode_cache = &active_node->inode_cache;
1588 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1589 total = btrfs_item_size_nr(eb, slot);
1590 while (cur < total) {
1591 name_len = btrfs_inode_extref_name_len(eb, extref);
1592 index = btrfs_inode_extref_index(eb, extref);
1593 parent = btrfs_inode_extref_parent(eb, extref);
1594 if (name_len <= BTRFS_NAME_LEN) {
1598 len = BTRFS_NAME_LEN;
1599 error = REF_ERR_NAME_TOO_LONG;
1601 read_extent_buffer(eb, namebuf,
1602 (unsigned long)(extref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, parent,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*extref) + name_len;
1607 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1614 static int count_csum_range(struct btrfs_root *root, u64 start,
1615 u64 len, u64 *found)
1617 struct btrfs_key key;
1618 struct btrfs_path path;
1619 struct extent_buffer *leaf;
1624 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1626 btrfs_init_path(&path);
1628 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1630 key.type = BTRFS_EXTENT_CSUM_KEY;
1632 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1636 if (ret > 0 && path.slots[0] > 0) {
1637 leaf = path.nodes[0];
1638 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1639 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1640 key.type == BTRFS_EXTENT_CSUM_KEY)
1645 leaf = path.nodes[0];
1646 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1647 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1652 leaf = path.nodes[0];
1655 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1656 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1657 key.type != BTRFS_EXTENT_CSUM_KEY)
1660 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1661 if (key.offset >= start + len)
1664 if (key.offset > start)
1667 size = btrfs_item_size_nr(leaf, path.slots[0]);
1668 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1669 if (csum_end > start) {
1670 size = min(csum_end - start, len);
1679 btrfs_release_path(&path);
1685 static int process_file_extent(struct btrfs_root *root,
1686 struct extent_buffer *eb,
1687 int slot, struct btrfs_key *key,
1688 struct shared_node *active_node)
1690 struct inode_record *rec;
1691 struct btrfs_file_extent_item *fi;
1693 u64 disk_bytenr = 0;
1694 u64 extent_offset = 0;
1695 u64 mask = root->sectorsize - 1;
1699 rec = active_node->current;
1700 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1701 rec->found_file_extent = 1;
1703 if (rec->extent_start == (u64)-1) {
1704 rec->extent_start = key->offset;
1705 rec->extent_end = key->offset;
1708 if (rec->extent_end > key->offset)
1709 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1710 else if (rec->extent_end < key->offset) {
1711 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1712 key->offset - rec->extent_end);
1717 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1718 extent_type = btrfs_file_extent_type(eb, fi);
1720 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1721 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1723 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1724 rec->found_size += num_bytes;
1725 num_bytes = (num_bytes + mask) & ~mask;
1726 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1727 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1728 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1729 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1730 extent_offset = btrfs_file_extent_offset(eb, fi);
1731 if (num_bytes == 0 || (num_bytes & mask))
1732 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1733 if (num_bytes + extent_offset >
1734 btrfs_file_extent_ram_bytes(eb, fi))
1735 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1736 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1737 (btrfs_file_extent_compression(eb, fi) ||
1738 btrfs_file_extent_encryption(eb, fi) ||
1739 btrfs_file_extent_other_encoding(eb, fi)))
1740 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1741 if (disk_bytenr > 0)
1742 rec->found_size += num_bytes;
1744 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1746 rec->extent_end = key->offset + num_bytes;
1749 * The data reloc tree will copy full extents into its inode and then
1750 * copy the corresponding csums. Because the extent it copied could be
1751 * a preallocated extent that hasn't been written to yet there may be no
1752 * csums to copy, ergo we won't have csums for our file extent. This is
1753 * ok so just don't bother checking csums if the inode belongs to the
1756 if (disk_bytenr > 0 &&
1757 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1759 if (btrfs_file_extent_compression(eb, fi))
1760 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1762 disk_bytenr += extent_offset;
1764 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1767 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1769 rec->found_csum_item = 1;
1770 if (found < num_bytes)
1771 rec->some_csum_missing = 1;
1772 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1774 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1780 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1781 struct walk_control *wc)
1783 struct btrfs_key key;
1787 struct cache_tree *inode_cache;
1788 struct shared_node *active_node;
1790 if (wc->root_level == wc->active_node &&
1791 btrfs_root_refs(&root->root_item) == 0)
1794 active_node = wc->nodes[wc->active_node];
1795 inode_cache = &active_node->inode_cache;
1796 nritems = btrfs_header_nritems(eb);
1797 for (i = 0; i < nritems; i++) {
1798 btrfs_item_key_to_cpu(eb, &key, i);
1800 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1802 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1805 if (active_node->current == NULL ||
1806 active_node->current->ino < key.objectid) {
1807 if (active_node->current) {
1808 active_node->current->checked = 1;
1809 maybe_free_inode_rec(inode_cache,
1810 active_node->current);
1812 active_node->current = get_inode_rec(inode_cache,
1814 BUG_ON(IS_ERR(active_node->current));
1817 case BTRFS_DIR_ITEM_KEY:
1818 case BTRFS_DIR_INDEX_KEY:
1819 ret = process_dir_item(root, eb, i, &key, active_node);
1821 case BTRFS_INODE_REF_KEY:
1822 ret = process_inode_ref(eb, i, &key, active_node);
1824 case BTRFS_INODE_EXTREF_KEY:
1825 ret = process_inode_extref(eb, i, &key, active_node);
1827 case BTRFS_INODE_ITEM_KEY:
1828 ret = process_inode_item(eb, i, &key, active_node);
1830 case BTRFS_EXTENT_DATA_KEY:
1831 ret = process_file_extent(root, eb, i, &key,
1841 static void reada_walk_down(struct btrfs_root *root,
1842 struct extent_buffer *node, int slot)
1851 level = btrfs_header_level(node);
1855 nritems = btrfs_header_nritems(node);
1856 blocksize = root->nodesize;
1857 for (i = slot; i < nritems; i++) {
1858 bytenr = btrfs_node_blockptr(node, i);
1859 ptr_gen = btrfs_node_ptr_generation(node, i);
1860 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1865 * Check the child node/leaf by the following condition:
1866 * 1. the first item key of the node/leaf should be the same with the one
1868 * 2. block in parent node should match the child node/leaf.
1869 * 3. generation of parent node and child's header should be consistent.
1871 * Or the child node/leaf pointed by the key in parent is not valid.
1873 * We hope to check leaf owner too, but since subvol may share leaves,
1874 * which makes leaf owner check not so strong, key check should be
1875 * sufficient enough for that case.
1877 static int check_child_node(struct btrfs_root *root,
1878 struct extent_buffer *parent, int slot,
1879 struct extent_buffer *child)
1881 struct btrfs_key parent_key;
1882 struct btrfs_key child_key;
1885 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1886 if (btrfs_header_level(child) == 0)
1887 btrfs_item_key_to_cpu(child, &child_key, 0);
1889 btrfs_node_key_to_cpu(child, &child_key, 0);
1891 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1894 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1895 parent_key.objectid, parent_key.type, parent_key.offset,
1896 child_key.objectid, child_key.type, child_key.offset);
1898 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1900 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1901 btrfs_node_blockptr(parent, slot),
1902 btrfs_header_bytenr(child));
1904 if (btrfs_node_ptr_generation(parent, slot) !=
1905 btrfs_header_generation(child)) {
1907 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1908 btrfs_header_generation(child),
1909 btrfs_node_ptr_generation(parent, slot));
1915 u64 bytenr[BTRFS_MAX_LEVEL];
1916 u64 refs[BTRFS_MAX_LEVEL];
1919 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1920 struct walk_control *wc, int *level,
1921 struct node_refs *nrefs)
1923 enum btrfs_tree_block_status status;
1926 struct extent_buffer *next;
1927 struct extent_buffer *cur;
1932 WARN_ON(*level < 0);
1933 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1935 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1936 refs = nrefs->refs[*level];
1939 ret = btrfs_lookup_extent_info(NULL, root,
1940 path->nodes[*level]->start,
1941 *level, 1, &refs, NULL);
1946 nrefs->bytenr[*level] = path->nodes[*level]->start;
1947 nrefs->refs[*level] = refs;
1951 ret = enter_shared_node(root, path->nodes[*level]->start,
1959 while (*level >= 0) {
1960 WARN_ON(*level < 0);
1961 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1962 cur = path->nodes[*level];
1964 if (btrfs_header_level(cur) != *level)
1967 if (path->slots[*level] >= btrfs_header_nritems(cur))
1970 ret = process_one_leaf(root, cur, wc);
1975 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1976 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
1977 blocksize = root->nodesize;
1979 if (bytenr == nrefs->bytenr[*level - 1]) {
1980 refs = nrefs->refs[*level - 1];
1982 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1983 *level - 1, 1, &refs, NULL);
1987 nrefs->bytenr[*level - 1] = bytenr;
1988 nrefs->refs[*level - 1] = refs;
1993 ret = enter_shared_node(root, bytenr, refs,
1996 path->slots[*level]++;
2001 next = btrfs_find_tree_block(root, bytenr, blocksize);
2002 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2003 free_extent_buffer(next);
2004 reada_walk_down(root, cur, path->slots[*level]);
2005 next = read_tree_block(root, bytenr, blocksize,
2007 if (!extent_buffer_uptodate(next)) {
2008 struct btrfs_key node_key;
2010 btrfs_node_key_to_cpu(path->nodes[*level],
2012 path->slots[*level]);
2013 btrfs_add_corrupt_extent_record(root->fs_info,
2015 path->nodes[*level]->start,
2016 root->nodesize, *level);
2022 ret = check_child_node(root, cur, path->slots[*level], next);
2028 if (btrfs_is_leaf(next))
2029 status = btrfs_check_leaf(root, NULL, next);
2031 status = btrfs_check_node(root, NULL, next);
2032 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2033 free_extent_buffer(next);
2038 *level = *level - 1;
2039 free_extent_buffer(path->nodes[*level]);
2040 path->nodes[*level] = next;
2041 path->slots[*level] = 0;
2044 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2048 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2049 struct walk_control *wc, int *level)
2052 struct extent_buffer *leaf;
2054 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2055 leaf = path->nodes[i];
2056 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2061 free_extent_buffer(path->nodes[*level]);
2062 path->nodes[*level] = NULL;
2063 BUG_ON(*level > wc->active_node);
2064 if (*level == wc->active_node)
2065 leave_shared_node(root, wc, *level);
2072 static int check_root_dir(struct inode_record *rec)
2074 struct inode_backref *backref;
2077 if (!rec->found_inode_item || rec->errors)
2079 if (rec->nlink != 1 || rec->found_link != 0)
2081 if (list_empty(&rec->backrefs))
2083 backref = to_inode_backref(rec->backrefs.next);
2084 if (!backref->found_inode_ref)
2086 if (backref->index != 0 || backref->namelen != 2 ||
2087 memcmp(backref->name, "..", 2))
2089 if (backref->found_dir_index || backref->found_dir_item)
2096 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2097 struct btrfs_root *root, struct btrfs_path *path,
2098 struct inode_record *rec)
2100 struct btrfs_inode_item *ei;
2101 struct btrfs_key key;
2104 key.objectid = rec->ino;
2105 key.type = BTRFS_INODE_ITEM_KEY;
2106 key.offset = (u64)-1;
2108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2112 if (!path->slots[0]) {
2119 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2120 if (key.objectid != rec->ino) {
2125 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2126 struct btrfs_inode_item);
2127 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2128 btrfs_mark_buffer_dirty(path->nodes[0]);
2129 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2130 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2131 root->root_key.objectid);
2133 btrfs_release_path(path);
2137 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2138 struct btrfs_root *root,
2139 struct btrfs_path *path,
2140 struct inode_record *rec)
2144 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2145 btrfs_release_path(path);
2147 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2151 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2152 struct btrfs_root *root,
2153 struct btrfs_path *path,
2154 struct inode_record *rec)
2156 struct btrfs_inode_item *ei;
2157 struct btrfs_key key;
2160 key.objectid = rec->ino;
2161 key.type = BTRFS_INODE_ITEM_KEY;
2164 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 /* Since ret == 0, no need to check anything */
2172 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2173 struct btrfs_inode_item);
2174 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2175 btrfs_mark_buffer_dirty(path->nodes[0]);
2176 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2177 printf("reset nbytes for ino %llu root %llu\n",
2178 rec->ino, root->root_key.objectid);
2180 btrfs_release_path(path);
2184 static int add_missing_dir_index(struct btrfs_root *root,
2185 struct cache_tree *inode_cache,
2186 struct inode_record *rec,
2187 struct inode_backref *backref)
2189 struct btrfs_path path;
2190 struct btrfs_trans_handle *trans;
2191 struct btrfs_dir_item *dir_item;
2192 struct extent_buffer *leaf;
2193 struct btrfs_key key;
2194 struct btrfs_disk_key disk_key;
2195 struct inode_record *dir_rec;
2196 unsigned long name_ptr;
2197 u32 data_size = sizeof(*dir_item) + backref->namelen;
2200 trans = btrfs_start_transaction(root, 1);
2202 return PTR_ERR(trans);
2204 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2205 (unsigned long long)rec->ino);
2207 btrfs_init_path(&path);
2208 key.objectid = backref->dir;
2209 key.type = BTRFS_DIR_INDEX_KEY;
2210 key.offset = backref->index;
2211 ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2214 leaf = path.nodes[0];
2215 dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2217 disk_key.objectid = cpu_to_le64(rec->ino);
2218 disk_key.type = BTRFS_INODE_ITEM_KEY;
2219 disk_key.offset = 0;
2221 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2222 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2223 btrfs_set_dir_data_len(leaf, dir_item, 0);
2224 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2225 name_ptr = (unsigned long)(dir_item + 1);
2226 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2227 btrfs_mark_buffer_dirty(leaf);
2228 btrfs_release_path(&path);
2229 btrfs_commit_transaction(trans, root);
2231 backref->found_dir_index = 1;
2232 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2233 BUG_ON(IS_ERR(dir_rec));
2236 dir_rec->found_size += backref->namelen;
2237 if (dir_rec->found_size == dir_rec->isize &&
2238 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2239 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2240 if (dir_rec->found_size != dir_rec->isize)
2241 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2246 static int delete_dir_index(struct btrfs_root *root,
2247 struct cache_tree *inode_cache,
2248 struct inode_record *rec,
2249 struct inode_backref *backref)
2251 struct btrfs_trans_handle *trans;
2252 struct btrfs_dir_item *di;
2253 struct btrfs_path path;
2256 trans = btrfs_start_transaction(root, 1);
2258 return PTR_ERR(trans);
2260 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2261 (unsigned long long)backref->dir,
2262 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2263 (unsigned long long)root->objectid);
2265 btrfs_init_path(&path);
2266 di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2267 backref->name, backref->namelen,
2268 backref->index, -1);
2271 btrfs_release_path(&path);
2272 btrfs_commit_transaction(trans, root);
2279 ret = btrfs_del_item(trans, root, &path);
2281 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2283 btrfs_release_path(&path);
2284 btrfs_commit_transaction(trans, root);
2288 static int create_inode_item(struct btrfs_root *root,
2289 struct inode_record *rec,
2290 struct inode_backref *backref, int root_dir)
2292 struct btrfs_trans_handle *trans;
2293 struct btrfs_inode_item inode_item;
2294 time_t now = time(NULL);
2297 trans = btrfs_start_transaction(root, 1);
2298 if (IS_ERR(trans)) {
2299 ret = PTR_ERR(trans);
2303 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2304 "be incomplete, please check permissions and content after "
2305 "the fsck completes.\n", (unsigned long long)root->objectid,
2306 (unsigned long long)rec->ino);
2308 memset(&inode_item, 0, sizeof(inode_item));
2309 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2311 btrfs_set_stack_inode_nlink(&inode_item, 1);
2313 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2314 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2315 if (rec->found_dir_item) {
2316 if (rec->found_file_extent)
2317 fprintf(stderr, "root %llu inode %llu has both a dir "
2318 "item and extents, unsure if it is a dir or a "
2319 "regular file so setting it as a directory\n",
2320 (unsigned long long)root->objectid,
2321 (unsigned long long)rec->ino);
2322 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2323 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2324 } else if (!rec->found_dir_item) {
2325 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2326 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2328 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2329 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2330 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2331 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2332 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2333 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2334 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2335 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2337 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2339 btrfs_commit_transaction(trans, root);
2343 static int repair_inode_backrefs(struct btrfs_root *root,
2344 struct inode_record *rec,
2345 struct cache_tree *inode_cache,
2348 struct inode_backref *tmp, *backref;
2349 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2353 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2354 if (!delete && rec->ino == root_dirid) {
2355 if (!rec->found_inode_item) {
2356 ret = create_inode_item(root, rec, backref, 1);
2363 /* Index 0 for root dir's are special, don't mess with it */
2364 if (rec->ino == root_dirid && backref->index == 0)
2368 ((backref->found_dir_index && !backref->found_inode_ref) ||
2369 (backref->found_dir_index && backref->found_inode_ref &&
2370 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2371 ret = delete_dir_index(root, inode_cache, rec, backref);
2375 list_del(&backref->list);
2379 if (!delete && !backref->found_dir_index &&
2380 backref->found_dir_item && backref->found_inode_ref) {
2381 ret = add_missing_dir_index(root, inode_cache, rec,
2386 if (backref->found_dir_item &&
2387 backref->found_dir_index &&
2388 backref->found_dir_index) {
2389 if (!backref->errors &&
2390 backref->found_inode_ref) {
2391 list_del(&backref->list);
2397 if (!delete && (!backref->found_dir_index &&
2398 !backref->found_dir_item &&
2399 backref->found_inode_ref)) {
2400 struct btrfs_trans_handle *trans;
2401 struct btrfs_key location;
2403 ret = check_dir_conflict(root, backref->name,
2409 * let nlink fixing routine to handle it,
2410 * which can do it better.
2415 location.objectid = rec->ino;
2416 location.type = BTRFS_INODE_ITEM_KEY;
2417 location.offset = 0;
2419 trans = btrfs_start_transaction(root, 1);
2420 if (IS_ERR(trans)) {
2421 ret = PTR_ERR(trans);
2424 fprintf(stderr, "adding missing dir index/item pair "
2426 (unsigned long long)rec->ino);
2427 ret = btrfs_insert_dir_item(trans, root, backref->name,
2429 backref->dir, &location,
2430 imode_to_type(rec->imode),
2433 btrfs_commit_transaction(trans, root);
2437 if (!delete && (backref->found_inode_ref &&
2438 backref->found_dir_index &&
2439 backref->found_dir_item &&
2440 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2441 !rec->found_inode_item)) {
2442 ret = create_inode_item(root, rec, backref, 0);
2449 return ret ? ret : repaired;
2453 * To determine the file type for nlink/inode_item repair
2455 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2456 * Return -ENOENT if file type is not found.
2458 static int find_file_type(struct inode_record *rec, u8 *type)
2460 struct inode_backref *backref;
2462 /* For inode item recovered case */
2463 if (rec->found_inode_item) {
2464 *type = imode_to_type(rec->imode);
2468 list_for_each_entry(backref, &rec->backrefs, list) {
2469 if (backref->found_dir_index || backref->found_dir_item) {
2470 *type = backref->filetype;
2478 * To determine the file name for nlink repair
2480 * Return 0 if file name is found, set name and namelen.
2481 * Return -ENOENT if file name is not found.
2483 static int find_file_name(struct inode_record *rec,
2484 char *name, int *namelen)
2486 struct inode_backref *backref;
2488 list_for_each_entry(backref, &rec->backrefs, list) {
2489 if (backref->found_dir_index || backref->found_dir_item ||
2490 backref->found_inode_ref) {
2491 memcpy(name, backref->name, backref->namelen);
2492 *namelen = backref->namelen;
2499 /* Reset the nlink of the inode to the correct one */
2500 static int reset_nlink(struct btrfs_trans_handle *trans,
2501 struct btrfs_root *root,
2502 struct btrfs_path *path,
2503 struct inode_record *rec)
2505 struct inode_backref *backref;
2506 struct inode_backref *tmp;
2507 struct btrfs_key key;
2508 struct btrfs_inode_item *inode_item;
2511 /* We don't believe this either, reset it and iterate backref */
2512 rec->found_link = 0;
2514 /* Remove all backref including the valid ones */
2515 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2516 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2517 backref->index, backref->name,
2518 backref->namelen, 0);
2522 /* remove invalid backref, so it won't be added back */
2523 if (!(backref->found_dir_index &&
2524 backref->found_dir_item &&
2525 backref->found_inode_ref)) {
2526 list_del(&backref->list);
2533 /* Set nlink to 0 */
2534 key.objectid = rec->ino;
2535 key.type = BTRFS_INODE_ITEM_KEY;
2537 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2544 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2545 struct btrfs_inode_item);
2546 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2547 btrfs_mark_buffer_dirty(path->nodes[0]);
2548 btrfs_release_path(path);
2551 * Add back valid inode_ref/dir_item/dir_index,
2552 * add_link() will handle the nlink inc, so new nlink must be correct
2554 list_for_each_entry(backref, &rec->backrefs, list) {
2555 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2556 backref->name, backref->namelen,
2557 backref->filetype, &backref->index, 1);
2562 btrfs_release_path(path);
2566 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2567 struct btrfs_root *root,
2568 struct btrfs_path *path,
2569 struct inode_record *rec)
2571 char *dir_name = "lost+found";
2572 char namebuf[BTRFS_NAME_LEN] = {0};
2577 int name_recovered = 0;
2578 int type_recovered = 0;
2582 * Get file name and type first before these invalid inode ref
2583 * are deleted by remove_all_invalid_backref()
2585 name_recovered = !find_file_name(rec, namebuf, &namelen);
2586 type_recovered = !find_file_type(rec, &type);
2588 if (!name_recovered) {
2589 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2590 rec->ino, rec->ino);
2591 namelen = count_digits(rec->ino);
2592 sprintf(namebuf, "%llu", rec->ino);
2595 if (!type_recovered) {
2596 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2598 type = BTRFS_FT_REG_FILE;
2602 ret = reset_nlink(trans, root, path, rec);
2605 "Failed to reset nlink for inode %llu: %s\n",
2606 rec->ino, strerror(-ret));
2610 if (rec->found_link == 0) {
2611 lost_found_ino = root->highest_inode;
2612 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2617 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2618 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2621 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2622 dir_name, strerror(-ret));
2625 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2626 namebuf, namelen, type, NULL, 1);
2628 * Add ".INO" suffix several times to handle case where
2629 * "FILENAME.INO" is already taken by another file.
2631 while (ret == -EEXIST) {
2633 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2635 if (namelen + count_digits(rec->ino) + 1 >
2640 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2642 namelen += count_digits(rec->ino) + 1;
2643 ret = btrfs_add_link(trans, root, rec->ino,
2644 lost_found_ino, namebuf,
2645 namelen, type, NULL, 1);
2649 "Failed to link the inode %llu to %s dir: %s\n",
2650 rec->ino, dir_name, strerror(-ret));
2654 * Just increase the found_link, don't actually add the
2655 * backref. This will make things easier and this inode
2656 * record will be freed after the repair is done.
2657 * So fsck will not report problem about this inode.
2660 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2661 namelen, namebuf, dir_name);
2663 printf("Fixed the nlink of inode %llu\n", rec->ino);
2666 * Clear the flag anyway, or we will loop forever for the same inode
2667 * as it will not be removed from the bad inode list and the dead loop
2670 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2671 btrfs_release_path(path);
2676 * Check if there is any normal(reg or prealloc) file extent for given
2678 * This is used to determine the file type when neither its dir_index/item or
2679 * inode_item exists.
2681 * This will *NOT* report error, if any error happens, just consider it does
2682 * not have any normal file extent.
2684 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2686 struct btrfs_path path;
2687 struct btrfs_key key;
2688 struct btrfs_key found_key;
2689 struct btrfs_file_extent_item *fi;
2693 btrfs_init_path(&path);
2695 key.type = BTRFS_EXTENT_DATA_KEY;
2698 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
2703 if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
2704 ret = btrfs_next_leaf(root, &path);
2711 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
2713 if (found_key.objectid != ino ||
2714 found_key.type != BTRFS_EXTENT_DATA_KEY)
2716 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
2717 struct btrfs_file_extent_item);
2718 type = btrfs_file_extent_type(path.nodes[0], fi);
2719 if (type != BTRFS_FILE_EXTENT_INLINE) {
2725 btrfs_release_path(&path);
2729 static u32 btrfs_type_to_imode(u8 type)
2731 static u32 imode_by_btrfs_type[] = {
2732 [BTRFS_FT_REG_FILE] = S_IFREG,
2733 [BTRFS_FT_DIR] = S_IFDIR,
2734 [BTRFS_FT_CHRDEV] = S_IFCHR,
2735 [BTRFS_FT_BLKDEV] = S_IFBLK,
2736 [BTRFS_FT_FIFO] = S_IFIFO,
2737 [BTRFS_FT_SOCK] = S_IFSOCK,
2738 [BTRFS_FT_SYMLINK] = S_IFLNK,
2741 return imode_by_btrfs_type[(type)];
2744 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2745 struct btrfs_root *root,
2746 struct btrfs_path *path,
2747 struct inode_record *rec)
2751 int type_recovered = 0;
2754 printf("Trying to rebuild inode:%llu\n", rec->ino);
2756 type_recovered = !find_file_type(rec, &filetype);
2759 * Try to determine inode type if type not found.
2761 * For found regular file extent, it must be FILE.
2762 * For found dir_item/index, it must be DIR.
2764 * For undetermined one, use FILE as fallback.
2767 * 1. If found backref(inode_index/item is already handled) to it,
2769 * Need new inode-inode ref structure to allow search for that.
2771 if (!type_recovered) {
2772 if (rec->found_file_extent &&
2773 find_normal_file_extent(root, rec->ino)) {
2775 filetype = BTRFS_FT_REG_FILE;
2776 } else if (rec->found_dir_item) {
2778 filetype = BTRFS_FT_DIR;
2779 } else if (!list_empty(&rec->orphan_extents)) {
2781 filetype = BTRFS_FT_REG_FILE;
2783 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2786 filetype = BTRFS_FT_REG_FILE;
2790 ret = btrfs_new_inode(trans, root, rec->ino,
2791 mode | btrfs_type_to_imode(filetype));
2796 * Here inode rebuild is done, we only rebuild the inode item,
2797 * don't repair the nlink(like move to lost+found).
2798 * That is the job of nlink repair.
2800 * We just fill the record and return
2802 rec->found_dir_item = 1;
2803 rec->imode = mode | btrfs_type_to_imode(filetype);
2805 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2806 /* Ensure the inode_nlinks repair function will be called */
2807 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2812 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2813 struct btrfs_root *root,
2814 struct btrfs_path *path,
2815 struct inode_record *rec)
2817 struct orphan_data_extent *orphan;
2818 struct orphan_data_extent *tmp;
2821 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2823 * Check for conflicting file extents
2825 * Here we don't know whether the extents is compressed or not,
2826 * so we can only assume it not compressed nor data offset,
2827 * and use its disk_len as extent length.
2829 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2830 orphan->offset, orphan->disk_len, 0);
2831 btrfs_release_path(path);
2836 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2837 orphan->disk_bytenr, orphan->disk_len);
2838 ret = btrfs_free_extent(trans,
2839 root->fs_info->extent_root,
2840 orphan->disk_bytenr, orphan->disk_len,
2841 0, root->objectid, orphan->objectid,
2846 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2847 orphan->offset, orphan->disk_bytenr,
2848 orphan->disk_len, orphan->disk_len);
2852 /* Update file size info */
2853 rec->found_size += orphan->disk_len;
2854 if (rec->found_size == rec->nbytes)
2855 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2857 /* Update the file extent hole info too */
2858 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2862 if (RB_EMPTY_ROOT(&rec->holes))
2863 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2865 list_del(&orphan->list);
2868 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2873 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2874 struct btrfs_root *root,
2875 struct btrfs_path *path,
2876 struct inode_record *rec)
2878 struct rb_node *node;
2879 struct file_extent_hole *hole;
2883 node = rb_first(&rec->holes);
2887 hole = rb_entry(node, struct file_extent_hole, node);
2888 ret = btrfs_punch_hole(trans, root, rec->ino,
2889 hole->start, hole->len);
2892 ret = del_file_extent_hole(&rec->holes, hole->start,
2896 if (RB_EMPTY_ROOT(&rec->holes))
2897 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2898 node = rb_first(&rec->holes);
2900 /* special case for a file losing all its file extent */
2902 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2903 round_up(rec->isize, root->sectorsize));
2907 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2908 rec->ino, root->objectid);
2913 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2915 struct btrfs_trans_handle *trans;
2916 struct btrfs_path path;
2919 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2920 I_ERR_NO_ORPHAN_ITEM |
2921 I_ERR_LINK_COUNT_WRONG |
2922 I_ERR_NO_INODE_ITEM |
2923 I_ERR_FILE_EXTENT_ORPHAN |
2924 I_ERR_FILE_EXTENT_DISCOUNT|
2925 I_ERR_FILE_NBYTES_WRONG)))
2929 * For nlink repair, it may create a dir and add link, so
2930 * 2 for parent(256)'s dir_index and dir_item
2931 * 2 for lost+found dir's inode_item and inode_ref
2932 * 1 for the new inode_ref of the file
2933 * 2 for lost+found dir's dir_index and dir_item for the file
2935 trans = btrfs_start_transaction(root, 7);
2937 return PTR_ERR(trans);
2939 btrfs_init_path(&path);
2940 if (rec->errors & I_ERR_NO_INODE_ITEM)
2941 ret = repair_inode_no_item(trans, root, &path, rec);
2942 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2943 ret = repair_inode_orphan_extent(trans, root, &path, rec);
2944 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2945 ret = repair_inode_discount_extent(trans, root, &path, rec);
2946 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2947 ret = repair_inode_isize(trans, root, &path, rec);
2948 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2949 ret = repair_inode_orphan_item(trans, root, &path, rec);
2950 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2951 ret = repair_inode_nlinks(trans, root, &path, rec);
2952 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2953 ret = repair_inode_nbytes(trans, root, &path, rec);
2954 btrfs_commit_transaction(trans, root);
2955 btrfs_release_path(&path);
2959 static int check_inode_recs(struct btrfs_root *root,
2960 struct cache_tree *inode_cache)
2962 struct cache_extent *cache;
2963 struct ptr_node *node;
2964 struct inode_record *rec;
2965 struct inode_backref *backref;
2970 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2972 if (btrfs_root_refs(&root->root_item) == 0) {
2973 if (!cache_tree_empty(inode_cache))
2974 fprintf(stderr, "warning line %d\n", __LINE__);
2979 * We need to record the highest inode number for later 'lost+found'
2981 * We must select an ino not used/referred by any existing inode, or
2982 * 'lost+found' ino may be a missing ino in a corrupted leaf,
2983 * this may cause 'lost+found' dir has wrong nlinks.
2985 cache = last_cache_extent(inode_cache);
2987 node = container_of(cache, struct ptr_node, cache);
2989 if (rec->ino > root->highest_inode)
2990 root->highest_inode = rec->ino;
2994 * We need to repair backrefs first because we could change some of the
2995 * errors in the inode recs.
2997 * We also need to go through and delete invalid backrefs first and then
2998 * add the correct ones second. We do this because we may get EEXIST
2999 * when adding back the correct index because we hadn't yet deleted the
3002 * For example, if we were missing a dir index then the directories
3003 * isize would be wrong, so if we fixed the isize to what we thought it
3004 * would be and then fixed the backref we'd still have a invalid fs, so
3005 * we need to add back the dir index and then check to see if the isize
3010 if (stage == 3 && !err)
3013 cache = search_cache_extent(inode_cache, 0);
3014 while (repair && cache) {
3015 node = container_of(cache, struct ptr_node, cache);
3017 cache = next_cache_extent(cache);
3019 /* Need to free everything up and rescan */
3021 remove_cache_extent(inode_cache, &node->cache);
3023 free_inode_rec(rec);
3027 if (list_empty(&rec->backrefs))
3030 ret = repair_inode_backrefs(root, rec, inode_cache,
3044 rec = get_inode_rec(inode_cache, root_dirid, 0);
3045 BUG_ON(IS_ERR(rec));
3047 ret = check_root_dir(rec);
3049 fprintf(stderr, "root %llu root dir %llu error\n",
3050 (unsigned long long)root->root_key.objectid,
3051 (unsigned long long)root_dirid);
3052 print_inode_error(root, rec);
3057 struct btrfs_trans_handle *trans;
3059 trans = btrfs_start_transaction(root, 1);
3060 if (IS_ERR(trans)) {
3061 err = PTR_ERR(trans);
3066 "root %llu missing its root dir, recreating\n",
3067 (unsigned long long)root->objectid);
3069 ret = btrfs_make_root_dir(trans, root, root_dirid);
3072 btrfs_commit_transaction(trans, root);
3076 fprintf(stderr, "root %llu root dir %llu not found\n",
3077 (unsigned long long)root->root_key.objectid,
3078 (unsigned long long)root_dirid);
3082 cache = search_cache_extent(inode_cache, 0);
3085 node = container_of(cache, struct ptr_node, cache);
3087 remove_cache_extent(inode_cache, &node->cache);
3089 if (rec->ino == root_dirid ||
3090 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3091 free_inode_rec(rec);
3095 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3096 ret = check_orphan_item(root, rec->ino);
3098 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3099 if (can_free_inode_rec(rec)) {
3100 free_inode_rec(rec);
3105 if (!rec->found_inode_item)
3106 rec->errors |= I_ERR_NO_INODE_ITEM;
3107 if (rec->found_link != rec->nlink)
3108 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3110 ret = try_repair_inode(root, rec);
3111 if (ret == 0 && can_free_inode_rec(rec)) {
3112 free_inode_rec(rec);
3118 if (!(repair && ret == 0))
3120 print_inode_error(root, rec);
3121 list_for_each_entry(backref, &rec->backrefs, list) {
3122 if (!backref->found_dir_item)
3123 backref->errors |= REF_ERR_NO_DIR_ITEM;
3124 if (!backref->found_dir_index)
3125 backref->errors |= REF_ERR_NO_DIR_INDEX;
3126 if (!backref->found_inode_ref)
3127 backref->errors |= REF_ERR_NO_INODE_REF;
3128 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3129 " namelen %u name %s filetype %d errors %x",
3130 (unsigned long long)backref->dir,
3131 (unsigned long long)backref->index,
3132 backref->namelen, backref->name,
3133 backref->filetype, backref->errors);
3134 print_ref_error(backref->errors);
3136 free_inode_rec(rec);
3138 return (error > 0) ? -1 : 0;
3141 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3144 struct cache_extent *cache;
3145 struct root_record *rec = NULL;
3148 cache = lookup_cache_extent(root_cache, objectid, 1);
3150 rec = container_of(cache, struct root_record, cache);
3152 rec = calloc(1, sizeof(*rec));
3154 return ERR_PTR(-ENOMEM);
3155 rec->objectid = objectid;
3156 INIT_LIST_HEAD(&rec->backrefs);
3157 rec->cache.start = objectid;
3158 rec->cache.size = 1;
3160 ret = insert_cache_extent(root_cache, &rec->cache);
3162 return ERR_PTR(-EEXIST);
3167 static struct root_backref *get_root_backref(struct root_record *rec,
3168 u64 ref_root, u64 dir, u64 index,
3169 const char *name, int namelen)
3171 struct root_backref *backref;
3173 list_for_each_entry(backref, &rec->backrefs, list) {
3174 if (backref->ref_root != ref_root || backref->dir != dir ||
3175 backref->namelen != namelen)
3177 if (memcmp(name, backref->name, namelen))
3182 backref = calloc(1, sizeof(*backref) + namelen + 1);
3185 backref->ref_root = ref_root;
3187 backref->index = index;
3188 backref->namelen = namelen;
3189 memcpy(backref->name, name, namelen);
3190 backref->name[namelen] = '\0';
3191 list_add_tail(&backref->list, &rec->backrefs);
3195 static void free_root_record(struct cache_extent *cache)
3197 struct root_record *rec;
3198 struct root_backref *backref;
3200 rec = container_of(cache, struct root_record, cache);
3201 while (!list_empty(&rec->backrefs)) {
3202 backref = to_root_backref(rec->backrefs.next);
3203 list_del(&backref->list);
3210 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3212 static int add_root_backref(struct cache_tree *root_cache,
3213 u64 root_id, u64 ref_root, u64 dir, u64 index,
3214 const char *name, int namelen,
3215 int item_type, int errors)
3217 struct root_record *rec;
3218 struct root_backref *backref;
3220 rec = get_root_rec(root_cache, root_id);
3221 BUG_ON(IS_ERR(rec));
3222 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3225 backref->errors |= errors;
3227 if (item_type != BTRFS_DIR_ITEM_KEY) {
3228 if (backref->found_dir_index || backref->found_back_ref ||
3229 backref->found_forward_ref) {
3230 if (backref->index != index)
3231 backref->errors |= REF_ERR_INDEX_UNMATCH;
3233 backref->index = index;
3237 if (item_type == BTRFS_DIR_ITEM_KEY) {
3238 if (backref->found_forward_ref)
3240 backref->found_dir_item = 1;
3241 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3242 backref->found_dir_index = 1;
3243 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3244 if (backref->found_forward_ref)
3245 backref->errors |= REF_ERR_DUP_ROOT_REF;
3246 else if (backref->found_dir_item)
3248 backref->found_forward_ref = 1;
3249 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3250 if (backref->found_back_ref)
3251 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3252 backref->found_back_ref = 1;
3257 if (backref->found_forward_ref && backref->found_dir_item)
3258 backref->reachable = 1;
3262 static int merge_root_recs(struct btrfs_root *root,
3263 struct cache_tree *src_cache,
3264 struct cache_tree *dst_cache)
3266 struct cache_extent *cache;
3267 struct ptr_node *node;
3268 struct inode_record *rec;
3269 struct inode_backref *backref;
3272 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3273 free_inode_recs_tree(src_cache);
3278 cache = search_cache_extent(src_cache, 0);
3281 node = container_of(cache, struct ptr_node, cache);
3283 remove_cache_extent(src_cache, &node->cache);
3286 ret = is_child_root(root, root->objectid, rec->ino);
3292 list_for_each_entry(backref, &rec->backrefs, list) {
3293 BUG_ON(backref->found_inode_ref);
3294 if (backref->found_dir_item)
3295 add_root_backref(dst_cache, rec->ino,
3296 root->root_key.objectid, backref->dir,
3297 backref->index, backref->name,
3298 backref->namelen, BTRFS_DIR_ITEM_KEY,
3300 if (backref->found_dir_index)
3301 add_root_backref(dst_cache, rec->ino,
3302 root->root_key.objectid, backref->dir,
3303 backref->index, backref->name,
3304 backref->namelen, BTRFS_DIR_INDEX_KEY,
3308 free_inode_rec(rec);
3315 static int check_root_refs(struct btrfs_root *root,
3316 struct cache_tree *root_cache)
3318 struct root_record *rec;
3319 struct root_record *ref_root;
3320 struct root_backref *backref;
3321 struct cache_extent *cache;
3327 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3328 BUG_ON(IS_ERR(rec));
3331 /* fixme: this can not detect circular references */
3334 cache = search_cache_extent(root_cache, 0);
3338 rec = container_of(cache, struct root_record, cache);
3339 cache = next_cache_extent(cache);
3341 if (rec->found_ref == 0)
3344 list_for_each_entry(backref, &rec->backrefs, list) {
3345 if (!backref->reachable)
3348 ref_root = get_root_rec(root_cache,
3350 BUG_ON(IS_ERR(ref_root));
3351 if (ref_root->found_ref > 0)
3354 backref->reachable = 0;
3356 if (rec->found_ref == 0)
3362 cache = search_cache_extent(root_cache, 0);
3366 rec = container_of(cache, struct root_record, cache);
3367 cache = next_cache_extent(cache);
3369 if (rec->found_ref == 0 &&
3370 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3371 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3372 ret = check_orphan_item(root->fs_info->tree_root,
3378 * If we don't have a root item then we likely just have
3379 * a dir item in a snapshot for this root but no actual
3380 * ref key or anything so it's meaningless.
3382 if (!rec->found_root_item)
3385 fprintf(stderr, "fs tree %llu not referenced\n",
3386 (unsigned long long)rec->objectid);
3390 if (rec->found_ref > 0 && !rec->found_root_item)
3392 list_for_each_entry(backref, &rec->backrefs, list) {
3393 if (!backref->found_dir_item)
3394 backref->errors |= REF_ERR_NO_DIR_ITEM;
3395 if (!backref->found_dir_index)
3396 backref->errors |= REF_ERR_NO_DIR_INDEX;
3397 if (!backref->found_back_ref)
3398 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3399 if (!backref->found_forward_ref)
3400 backref->errors |= REF_ERR_NO_ROOT_REF;
3401 if (backref->reachable && backref->errors)
3408 fprintf(stderr, "fs tree %llu refs %u %s\n",
3409 (unsigned long long)rec->objectid, rec->found_ref,
3410 rec->found_root_item ? "" : "not found");
3412 list_for_each_entry(backref, &rec->backrefs, list) {
3413 if (!backref->reachable)
3415 if (!backref->errors && rec->found_root_item)
3417 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3418 " index %llu namelen %u name %s errors %x\n",
3419 (unsigned long long)backref->ref_root,
3420 (unsigned long long)backref->dir,
3421 (unsigned long long)backref->index,
3422 backref->namelen, backref->name,
3424 print_ref_error(backref->errors);
3427 return errors > 0 ? 1 : 0;
3430 static int process_root_ref(struct extent_buffer *eb, int slot,
3431 struct btrfs_key *key,
3432 struct cache_tree *root_cache)
3438 struct btrfs_root_ref *ref;
3439 char namebuf[BTRFS_NAME_LEN];
3442 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3444 dirid = btrfs_root_ref_dirid(eb, ref);
3445 index = btrfs_root_ref_sequence(eb, ref);
3446 name_len = btrfs_root_ref_name_len(eb, ref);
3448 if (name_len <= BTRFS_NAME_LEN) {
3452 len = BTRFS_NAME_LEN;
3453 error = REF_ERR_NAME_TOO_LONG;
3455 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3457 if (key->type == BTRFS_ROOT_REF_KEY) {
3458 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3459 index, namebuf, len, key->type, error);
3461 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3462 index, namebuf, len, key->type, error);
3467 static void free_corrupt_block(struct cache_extent *cache)
3469 struct btrfs_corrupt_block *corrupt;
3471 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3475 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3478 * Repair the btree of the given root.
3480 * The fix is to remove the node key in corrupt_blocks cache_tree.
3481 * and rebalance the tree.
3482 * After the fix, the btree should be writeable.
3484 static int repair_btree(struct btrfs_root *root,
3485 struct cache_tree *corrupt_blocks)
3487 struct btrfs_trans_handle *trans;
3488 struct btrfs_path path;
3489 struct btrfs_corrupt_block *corrupt;
3490 struct cache_extent *cache;
3491 struct btrfs_key key;
3496 if (cache_tree_empty(corrupt_blocks))
3499 trans = btrfs_start_transaction(root, 1);
3500 if (IS_ERR(trans)) {
3501 ret = PTR_ERR(trans);
3502 fprintf(stderr, "Error starting transaction: %s\n",
3506 btrfs_init_path(&path);
3507 cache = first_cache_extent(corrupt_blocks);
3509 corrupt = container_of(cache, struct btrfs_corrupt_block,
3511 level = corrupt->level;
3512 path.lowest_level = level;
3513 key.objectid = corrupt->key.objectid;
3514 key.type = corrupt->key.type;
3515 key.offset = corrupt->key.offset;
3518 * Here we don't want to do any tree balance, since it may
3519 * cause a balance with corrupted brother leaf/node,
3520 * so ins_len set to 0 here.
3521 * Balance will be done after all corrupt node/leaf is deleted.
3523 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3526 offset = btrfs_node_blockptr(path.nodes[level],
3529 /* Remove the ptr */
3530 ret = btrfs_del_ptr(trans, root, &path, level,
3535 * Remove the corresponding extent
3536 * return value is not concerned.
3538 btrfs_release_path(&path);
3539 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3540 0, root->root_key.objectid,
3542 cache = next_cache_extent(cache);
3545 /* Balance the btree using btrfs_search_slot() */
3546 cache = first_cache_extent(corrupt_blocks);
3548 corrupt = container_of(cache, struct btrfs_corrupt_block,
3550 memcpy(&key, &corrupt->key, sizeof(key));
3551 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3554 /* return will always >0 since it won't find the item */
3556 btrfs_release_path(&path);
3557 cache = next_cache_extent(cache);
3560 btrfs_commit_transaction(trans, root);
3561 btrfs_release_path(&path);
3565 static int check_fs_root(struct btrfs_root *root,
3566 struct cache_tree *root_cache,
3567 struct walk_control *wc)
3573 struct btrfs_path path;
3574 struct shared_node root_node;
3575 struct root_record *rec;
3576 struct btrfs_root_item *root_item = &root->root_item;
3577 struct cache_tree corrupt_blocks;
3578 struct orphan_data_extent *orphan;
3579 struct orphan_data_extent *tmp;
3580 enum btrfs_tree_block_status status;
3581 struct node_refs nrefs;
3584 * Reuse the corrupt_block cache tree to record corrupted tree block
3586 * Unlike the usage in extent tree check, here we do it in a per
3587 * fs/subvol tree base.
3589 cache_tree_init(&corrupt_blocks);
3590 root->fs_info->corrupt_blocks = &corrupt_blocks;
3592 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3593 rec = get_root_rec(root_cache, root->root_key.objectid);
3594 BUG_ON(IS_ERR(rec));
3595 if (btrfs_root_refs(root_item) > 0)
3596 rec->found_root_item = 1;
3599 btrfs_init_path(&path);
3600 memset(&root_node, 0, sizeof(root_node));
3601 cache_tree_init(&root_node.root_cache);
3602 cache_tree_init(&root_node.inode_cache);
3603 memset(&nrefs, 0, sizeof(nrefs));
3605 /* Move the orphan extent record to corresponding inode_record */
3606 list_for_each_entry_safe(orphan, tmp,
3607 &root->orphan_data_extents, list) {
3608 struct inode_record *inode;
3610 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3612 BUG_ON(IS_ERR(inode));
3613 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3614 list_move(&orphan->list, &inode->orphan_extents);
3617 level = btrfs_header_level(root->node);
3618 memset(wc->nodes, 0, sizeof(wc->nodes));
3619 wc->nodes[level] = &root_node;
3620 wc->active_node = level;
3621 wc->root_level = level;
3623 /* We may not have checked the root block, lets do that now */
3624 if (btrfs_is_leaf(root->node))
3625 status = btrfs_check_leaf(root, NULL, root->node);
3627 status = btrfs_check_node(root, NULL, root->node);
3628 if (status != BTRFS_TREE_BLOCK_CLEAN)
3631 if (btrfs_root_refs(root_item) > 0 ||
3632 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3633 path.nodes[level] = root->node;
3634 extent_buffer_get(root->node);
3635 path.slots[level] = 0;
3637 struct btrfs_key key;
3638 struct btrfs_disk_key found_key;
3640 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3641 level = root_item->drop_level;
3642 path.lowest_level = level;
3643 if (level > btrfs_header_level(root->node) ||
3644 level >= BTRFS_MAX_LEVEL) {
3645 error("ignoring invalid drop level: %u", level);
3648 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3651 btrfs_node_key(path.nodes[level], &found_key,
3653 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3654 sizeof(found_key)));
3658 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3664 wret = walk_up_tree(root, &path, wc, &level);
3671 btrfs_release_path(&path);
3673 if (!cache_tree_empty(&corrupt_blocks)) {
3674 struct cache_extent *cache;
3675 struct btrfs_corrupt_block *corrupt;
3677 printf("The following tree block(s) is corrupted in tree %llu:\n",
3678 root->root_key.objectid);
3679 cache = first_cache_extent(&corrupt_blocks);
3681 corrupt = container_of(cache,
3682 struct btrfs_corrupt_block,
3684 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3685 cache->start, corrupt->level,
3686 corrupt->key.objectid, corrupt->key.type,
3687 corrupt->key.offset);
3688 cache = next_cache_extent(cache);
3691 printf("Try to repair the btree for root %llu\n",
3692 root->root_key.objectid);
3693 ret = repair_btree(root, &corrupt_blocks);
3695 fprintf(stderr, "Failed to repair btree: %s\n",
3698 printf("Btree for root %llu is fixed\n",
3699 root->root_key.objectid);
3703 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3707 if (root_node.current) {
3708 root_node.current->checked = 1;
3709 maybe_free_inode_rec(&root_node.inode_cache,
3713 err = check_inode_recs(root, &root_node.inode_cache);
3717 free_corrupt_blocks_tree(&corrupt_blocks);
3718 root->fs_info->corrupt_blocks = NULL;
3719 free_orphan_data_extents(&root->orphan_data_extents);
3723 static int fs_root_objectid(u64 objectid)
3725 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3726 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3728 return is_fstree(objectid);
3731 static int check_fs_roots(struct btrfs_root *root,
3732 struct cache_tree *root_cache)
3734 struct btrfs_path path;
3735 struct btrfs_key key;
3736 struct walk_control wc;
3737 struct extent_buffer *leaf, *tree_node;
3738 struct btrfs_root *tmp_root;
3739 struct btrfs_root *tree_root = root->fs_info->tree_root;
3743 if (ctx.progress_enabled) {
3744 ctx.tp = TASK_FS_ROOTS;
3745 task_start(ctx.info);
3749 * Just in case we made any changes to the extent tree that weren't
3750 * reflected into the free space cache yet.
3753 reset_cached_block_groups(root->fs_info);
3754 memset(&wc, 0, sizeof(wc));
3755 cache_tree_init(&wc.shared);
3756 btrfs_init_path(&path);
3761 key.type = BTRFS_ROOT_ITEM_KEY;
3762 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3767 tree_node = tree_root->node;
3769 if (tree_node != tree_root->node) {
3770 free_root_recs_tree(root_cache);
3771 btrfs_release_path(&path);
3774 leaf = path.nodes[0];
3775 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3776 ret = btrfs_next_leaf(tree_root, &path);
3782 leaf = path.nodes[0];
3784 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3785 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3786 fs_root_objectid(key.objectid)) {
3787 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3788 tmp_root = btrfs_read_fs_root_no_cache(
3789 root->fs_info, &key);
3791 key.offset = (u64)-1;
3792 tmp_root = btrfs_read_fs_root(
3793 root->fs_info, &key);
3795 if (IS_ERR(tmp_root)) {
3799 ret = check_fs_root(tmp_root, root_cache, &wc);
3800 if (ret == -EAGAIN) {
3801 free_root_recs_tree(root_cache);
3802 btrfs_release_path(&path);
3807 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3808 btrfs_free_fs_root(tmp_root);
3809 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3810 key.type == BTRFS_ROOT_BACKREF_KEY) {
3811 process_root_ref(leaf, path.slots[0], &key,
3818 btrfs_release_path(&path);
3820 free_extent_cache_tree(&wc.shared);
3821 if (!cache_tree_empty(&wc.shared))
3822 fprintf(stderr, "warning line %d\n", __LINE__);
3824 task_stop(ctx.info);
3829 #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
3830 #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
3831 #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
3834 * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
3835 * INODE_REF/INODE_EXTREF match.
3837 * @root: the root of the fs/file tree
3838 * @ref_key: the key of the INODE_REF/INODE_EXTREF
3839 * @key: the key of the DIR_ITEM/DIR_INDEX
3840 * @index: the index in the INODE_REF/INODE_EXTREF, be used to
3841 * distinguish root_dir between normal dir/file
3842 * @name: the name in the INODE_REF/INODE_EXTREF
3843 * @namelen: the length of name in the INODE_REF/INODE_EXTREF
3844 * @mode: the st_mode of INODE_ITEM
3846 * Return 0 if no error occurred.
3847 * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
3848 * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
3850 * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
3851 * not match for normal dir/file.
3853 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
3854 struct btrfs_key *key, u64 index, char *name,
3855 u32 namelen, u32 mode)
3857 struct btrfs_path path;
3858 struct extent_buffer *node;
3859 struct btrfs_dir_item *di;
3860 struct btrfs_key location;
3861 char namebuf[BTRFS_NAME_LEN] = {0};
3871 btrfs_init_path(&path);
3872 ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
3874 ret = DIR_ITEM_MISSING;
3878 /* Process root dir and goto out*/
3881 ret = ROOT_DIR_ERROR;
3883 "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have %s",
3885 ref_key->type == BTRFS_INODE_REF_KEY ?
3887 ref_key->objectid, ref_key->offset,
3888 key->type == BTRFS_DIR_ITEM_KEY ?
3889 "DIR_ITEM" : "DIR_INDEX");
3897 /* Process normal file/dir */
3899 ret = DIR_ITEM_MISSING;
3901 "root %llu INODE %s[%llu %llu] doesn't have related %s[%llu %llu] namelen %u filename %s filetype %d",
3903 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3904 ref_key->objectid, ref_key->offset,
3905 key->type == BTRFS_DIR_ITEM_KEY ?
3906 "DIR_ITEM" : "DIR_INDEX",
3907 key->objectid, key->offset, namelen, name,
3908 imode_to_type(mode));
3912 /* Check whether inode_id/filetype/name match */
3913 node = path.nodes[0];
3914 slot = path.slots[0];
3915 di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
3916 total = btrfs_item_size_nr(node, slot);
3917 while (cur < total) {
3918 ret = DIR_ITEM_MISMATCH;
3919 name_len = btrfs_dir_name_len(node, di);
3920 data_len = btrfs_dir_data_len(node, di);
3922 btrfs_dir_item_key_to_cpu(node, di, &location);
3923 if (location.objectid != ref_key->objectid ||
3924 location.type != BTRFS_INODE_ITEM_KEY ||
3925 location.offset != 0)
3928 filetype = btrfs_dir_type(node, di);
3929 if (imode_to_type(mode) != filetype)
3932 if (name_len <= BTRFS_NAME_LEN) {
3935 len = BTRFS_NAME_LEN;
3936 warning("root %llu %s[%llu %llu] name too long %u, trimmed",
3938 key->type == BTRFS_DIR_ITEM_KEY ?
3939 "DIR_ITEM" : "DIR_INDEX",
3940 key->objectid, key->offset, name_len);
3942 read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
3943 if (len != namelen || strncmp(namebuf, name, len))
3949 len = sizeof(*di) + name_len + data_len;
3950 di = (struct btrfs_dir_item *)((char *)di + len);
3953 if (ret == DIR_ITEM_MISMATCH)
3955 "root %llu INODE %s[%llu %llu] and %s[%llu %llu] mismatch namelen %u filename %s filetype %d",
3957 ref_key->type == BTRFS_INODE_REF_KEY ? "REF" : "EXTREF",
3958 ref_key->objectid, ref_key->offset,
3959 key->type == BTRFS_DIR_ITEM_KEY ?
3960 "DIR_ITEM" : "DIR_INDEX",
3961 key->objectid, key->offset, namelen, name,
3962 imode_to_type(mode));
3964 btrfs_release_path(&path);
3968 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3970 struct list_head *cur = rec->backrefs.next;
3971 struct extent_backref *back;
3972 struct tree_backref *tback;
3973 struct data_backref *dback;
3977 while(cur != &rec->backrefs) {
3978 back = to_extent_backref(cur);
3980 if (!back->found_extent_tree) {
3984 if (back->is_data) {
3985 dback = to_data_backref(back);
3986 fprintf(stderr, "Backref %llu %s %llu"
3987 " owner %llu offset %llu num_refs %lu"
3988 " not found in extent tree\n",
3989 (unsigned long long)rec->start,
3990 back->full_backref ?
3992 back->full_backref ?
3993 (unsigned long long)dback->parent:
3994 (unsigned long long)dback->root,
3995 (unsigned long long)dback->owner,
3996 (unsigned long long)dback->offset,
3997 (unsigned long)dback->num_refs);
3999 tback = to_tree_backref(back);
4000 fprintf(stderr, "Backref %llu parent %llu"
4001 " root %llu not found in extent tree\n",
4002 (unsigned long long)rec->start,
4003 (unsigned long long)tback->parent,
4004 (unsigned long long)tback->root);
4007 if (!back->is_data && !back->found_ref) {
4011 tback = to_tree_backref(back);
4012 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
4013 (unsigned long long)rec->start,
4014 back->full_backref ? "parent" : "root",
4015 back->full_backref ?
4016 (unsigned long long)tback->parent :
4017 (unsigned long long)tback->root, back);
4019 if (back->is_data) {
4020 dback = to_data_backref(back);
4021 if (dback->found_ref != dback->num_refs) {
4025 fprintf(stderr, "Incorrect local backref count"
4026 " on %llu %s %llu owner %llu"
4027 " offset %llu found %u wanted %u back %p\n",
4028 (unsigned long long)rec->start,
4029 back->full_backref ?
4031 back->full_backref ?
4032 (unsigned long long)dback->parent:
4033 (unsigned long long)dback->root,
4034 (unsigned long long)dback->owner,
4035 (unsigned long long)dback->offset,
4036 dback->found_ref, dback->num_refs, back);
4038 if (dback->disk_bytenr != rec->start) {
4042 fprintf(stderr, "Backref disk bytenr does not"
4043 " match extent record, bytenr=%llu, "
4044 "ref bytenr=%llu\n",
4045 (unsigned long long)rec->start,
4046 (unsigned long long)dback->disk_bytenr);
4049 if (dback->bytes != rec->nr) {
4053 fprintf(stderr, "Backref bytes do not match "
4054 "extent backref, bytenr=%llu, ref "
4055 "bytes=%llu, backref bytes=%llu\n",
4056 (unsigned long long)rec->start,
4057 (unsigned long long)rec->nr,
4058 (unsigned long long)dback->bytes);
4061 if (!back->is_data) {
4064 dback = to_data_backref(back);
4065 found += dback->found_ref;
4068 if (found != rec->refs) {
4072 fprintf(stderr, "Incorrect global backref count "
4073 "on %llu found %llu wanted %llu\n",
4074 (unsigned long long)rec->start,
4075 (unsigned long long)found,
4076 (unsigned long long)rec->refs);
4082 static int free_all_extent_backrefs(struct extent_record *rec)
4084 struct extent_backref *back;
4085 struct list_head *cur;
4086 while (!list_empty(&rec->backrefs)) {
4087 cur = rec->backrefs.next;
4088 back = to_extent_backref(cur);
4095 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4096 struct cache_tree *extent_cache)
4098 struct cache_extent *cache;
4099 struct extent_record *rec;
4102 cache = first_cache_extent(extent_cache);
4105 rec = container_of(cache, struct extent_record, cache);
4106 remove_cache_extent(extent_cache, cache);
4107 free_all_extent_backrefs(rec);
4112 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4113 struct extent_record *rec)
4115 if (rec->content_checked && rec->owner_ref_checked &&
4116 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4117 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4118 !rec->bad_full_backref && !rec->crossing_stripes &&
4119 !rec->wrong_chunk_type) {
4120 remove_cache_extent(extent_cache, &rec->cache);
4121 free_all_extent_backrefs(rec);
4122 list_del_init(&rec->list);
4128 static int check_owner_ref(struct btrfs_root *root,
4129 struct extent_record *rec,
4130 struct extent_buffer *buf)
4132 struct extent_backref *node;
4133 struct tree_backref *back;
4134 struct btrfs_root *ref_root;
4135 struct btrfs_key key;
4136 struct btrfs_path path;
4137 struct extent_buffer *parent;
4142 list_for_each_entry(node, &rec->backrefs, list) {
4145 if (!node->found_ref)
4147 if (node->full_backref)
4149 back = to_tree_backref(node);
4150 if (btrfs_header_owner(buf) == back->root)
4153 BUG_ON(rec->is_root);
4155 /* try to find the block by search corresponding fs tree */
4156 key.objectid = btrfs_header_owner(buf);
4157 key.type = BTRFS_ROOT_ITEM_KEY;
4158 key.offset = (u64)-1;
4160 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4161 if (IS_ERR(ref_root))
4164 level = btrfs_header_level(buf);
4166 btrfs_item_key_to_cpu(buf, &key, 0);
4168 btrfs_node_key_to_cpu(buf, &key, 0);
4170 btrfs_init_path(&path);
4171 path.lowest_level = level + 1;
4172 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4176 parent = path.nodes[level + 1];
4177 if (parent && buf->start == btrfs_node_blockptr(parent,
4178 path.slots[level + 1]))
4181 btrfs_release_path(&path);
4182 return found ? 0 : 1;
4185 static int is_extent_tree_record(struct extent_record *rec)
4187 struct list_head *cur = rec->backrefs.next;
4188 struct extent_backref *node;
4189 struct tree_backref *back;
4192 while(cur != &rec->backrefs) {
4193 node = to_extent_backref(cur);
4197 back = to_tree_backref(node);
4198 if (node->full_backref)
4200 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4207 static int record_bad_block_io(struct btrfs_fs_info *info,
4208 struct cache_tree *extent_cache,
4211 struct extent_record *rec;
4212 struct cache_extent *cache;
4213 struct btrfs_key key;
4215 cache = lookup_cache_extent(extent_cache, start, len);
4219 rec = container_of(cache, struct extent_record, cache);
4220 if (!is_extent_tree_record(rec))
4223 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4224 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4227 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4228 struct extent_buffer *buf, int slot)
4230 if (btrfs_header_level(buf)) {
4231 struct btrfs_key_ptr ptr1, ptr2;
4233 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4234 sizeof(struct btrfs_key_ptr));
4235 read_extent_buffer(buf, &ptr2,
4236 btrfs_node_key_ptr_offset(slot + 1),
4237 sizeof(struct btrfs_key_ptr));
4238 write_extent_buffer(buf, &ptr1,
4239 btrfs_node_key_ptr_offset(slot + 1),
4240 sizeof(struct btrfs_key_ptr));
4241 write_extent_buffer(buf, &ptr2,
4242 btrfs_node_key_ptr_offset(slot),
4243 sizeof(struct btrfs_key_ptr));
4245 struct btrfs_disk_key key;
4246 btrfs_node_key(buf, &key, 0);
4247 btrfs_fixup_low_keys(root, path, &key,
4248 btrfs_header_level(buf) + 1);
4251 struct btrfs_item *item1, *item2;
4252 struct btrfs_key k1, k2;
4253 char *item1_data, *item2_data;
4254 u32 item1_offset, item2_offset, item1_size, item2_size;
4256 item1 = btrfs_item_nr(slot);
4257 item2 = btrfs_item_nr(slot + 1);
4258 btrfs_item_key_to_cpu(buf, &k1, slot);
4259 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4260 item1_offset = btrfs_item_offset(buf, item1);
4261 item2_offset = btrfs_item_offset(buf, item2);
4262 item1_size = btrfs_item_size(buf, item1);
4263 item2_size = btrfs_item_size(buf, item2);
4265 item1_data = malloc(item1_size);
4268 item2_data = malloc(item2_size);
4274 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4275 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4277 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4278 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4282 btrfs_set_item_offset(buf, item1, item2_offset);
4283 btrfs_set_item_offset(buf, item2, item1_offset);
4284 btrfs_set_item_size(buf, item1, item2_size);
4285 btrfs_set_item_size(buf, item2, item1_size);
4287 path->slots[0] = slot;
4288 btrfs_set_item_key_unsafe(root, path, &k2);
4289 path->slots[0] = slot + 1;
4290 btrfs_set_item_key_unsafe(root, path, &k1);
4295 static int fix_key_order(struct btrfs_trans_handle *trans,
4296 struct btrfs_root *root,
4297 struct btrfs_path *path)
4299 struct extent_buffer *buf;
4300 struct btrfs_key k1, k2;
4302 int level = path->lowest_level;
4305 buf = path->nodes[level];
4306 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4308 btrfs_node_key_to_cpu(buf, &k1, i);
4309 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4311 btrfs_item_key_to_cpu(buf, &k1, i);
4312 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4314 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4316 ret = swap_values(root, path, buf, i);
4319 btrfs_mark_buffer_dirty(buf);
4325 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4326 struct btrfs_root *root,
4327 struct btrfs_path *path,
4328 struct extent_buffer *buf, int slot)
4330 struct btrfs_key key;
4331 int nritems = btrfs_header_nritems(buf);
4333 btrfs_item_key_to_cpu(buf, &key, slot);
4335 /* These are all the keys we can deal with missing. */
4336 if (key.type != BTRFS_DIR_INDEX_KEY &&
4337 key.type != BTRFS_EXTENT_ITEM_KEY &&
4338 key.type != BTRFS_METADATA_ITEM_KEY &&
4339 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4340 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4343 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4344 (unsigned long long)key.objectid, key.type,
4345 (unsigned long long)key.offset, slot, buf->start);
4346 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4347 btrfs_item_nr_offset(slot + 1),
4348 sizeof(struct btrfs_item) *
4349 (nritems - slot - 1));
4350 btrfs_set_header_nritems(buf, nritems - 1);
4352 struct btrfs_disk_key disk_key;
4354 btrfs_item_key(buf, &disk_key, 0);
4355 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4357 btrfs_mark_buffer_dirty(buf);
4361 static int fix_item_offset(struct btrfs_trans_handle *trans,
4362 struct btrfs_root *root,
4363 struct btrfs_path *path)
4365 struct extent_buffer *buf;
4369 /* We should only get this for leaves */
4370 BUG_ON(path->lowest_level);
4371 buf = path->nodes[0];
4373 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4374 unsigned int shift = 0, offset;
4376 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4377 BTRFS_LEAF_DATA_SIZE(root)) {
4378 if (btrfs_item_end_nr(buf, i) >
4379 BTRFS_LEAF_DATA_SIZE(root)) {
4380 ret = delete_bogus_item(trans, root, path,
4384 fprintf(stderr, "item is off the end of the "
4385 "leaf, can't fix\n");
4389 shift = BTRFS_LEAF_DATA_SIZE(root) -
4390 btrfs_item_end_nr(buf, i);
4391 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4392 btrfs_item_offset_nr(buf, i - 1)) {
4393 if (btrfs_item_end_nr(buf, i) >
4394 btrfs_item_offset_nr(buf, i - 1)) {
4395 ret = delete_bogus_item(trans, root, path,
4399 fprintf(stderr, "items overlap, can't fix\n");
4403 shift = btrfs_item_offset_nr(buf, i - 1) -
4404 btrfs_item_end_nr(buf, i);
4409 printf("Shifting item nr %d by %u bytes in block %llu\n",
4410 i, shift, (unsigned long long)buf->start);
4411 offset = btrfs_item_offset_nr(buf, i);
4412 memmove_extent_buffer(buf,
4413 btrfs_leaf_data(buf) + offset + shift,
4414 btrfs_leaf_data(buf) + offset,
4415 btrfs_item_size_nr(buf, i));
4416 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4418 btrfs_mark_buffer_dirty(buf);
4422 * We may have moved things, in which case we want to exit so we don't
4423 * write those changes out. Once we have proper abort functionality in
4424 * progs this can be changed to something nicer.
4431 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4432 * then just return -EIO.
4434 static int try_to_fix_bad_block(struct btrfs_root *root,
4435 struct extent_buffer *buf,
4436 enum btrfs_tree_block_status status)
4438 struct btrfs_trans_handle *trans;
4439 struct ulist *roots;
4440 struct ulist_node *node;
4441 struct btrfs_root *search_root;
4442 struct btrfs_path path;
4443 struct ulist_iterator iter;
4444 struct btrfs_key root_key, key;
4447 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4448 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
4455 btrfs_init_path(&path);
4456 ULIST_ITER_INIT(&iter);
4457 while ((node = ulist_next(roots, &iter))) {
4458 root_key.objectid = node->val;
4459 root_key.type = BTRFS_ROOT_ITEM_KEY;
4460 root_key.offset = (u64)-1;
4462 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4469 trans = btrfs_start_transaction(search_root, 0);
4470 if (IS_ERR(trans)) {
4471 ret = PTR_ERR(trans);
4475 path.lowest_level = btrfs_header_level(buf);
4476 path.skip_check_block = 1;
4477 if (path.lowest_level)
4478 btrfs_node_key_to_cpu(buf, &key, 0);
4480 btrfs_item_key_to_cpu(buf, &key, 0);
4481 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
4484 btrfs_commit_transaction(trans, search_root);
4487 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4488 ret = fix_key_order(trans, search_root, &path);
4489 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4490 ret = fix_item_offset(trans, search_root, &path);
4492 btrfs_commit_transaction(trans, search_root);
4495 btrfs_release_path(&path);
4496 btrfs_commit_transaction(trans, search_root);
4499 btrfs_release_path(&path);
4503 static int check_block(struct btrfs_root *root,
4504 struct cache_tree *extent_cache,
4505 struct extent_buffer *buf, u64 flags)
4507 struct extent_record *rec;
4508 struct cache_extent *cache;
4509 struct btrfs_key key;
4510 enum btrfs_tree_block_status status;
4514 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4517 rec = container_of(cache, struct extent_record, cache);
4518 rec->generation = btrfs_header_generation(buf);
4520 level = btrfs_header_level(buf);
4521 if (btrfs_header_nritems(buf) > 0) {
4524 btrfs_item_key_to_cpu(buf, &key, 0);
4526 btrfs_node_key_to_cpu(buf, &key, 0);
4528 rec->info_objectid = key.objectid;
4530 rec->info_level = level;
4532 if (btrfs_is_leaf(buf))
4533 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4535 status = btrfs_check_node(root, &rec->parent_key, buf);
4537 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4539 status = try_to_fix_bad_block(root, buf, status);
4540 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4542 fprintf(stderr, "bad block %llu\n",
4543 (unsigned long long)buf->start);
4546 * Signal to callers we need to start the scan over
4547 * again since we'll have cowed blocks.
4552 rec->content_checked = 1;
4553 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4554 rec->owner_ref_checked = 1;
4556 ret = check_owner_ref(root, rec, buf);
4558 rec->owner_ref_checked = 1;
4562 maybe_free_extent_rec(extent_cache, rec);
4566 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4567 u64 parent, u64 root)
4569 struct list_head *cur = rec->backrefs.next;
4570 struct extent_backref *node;
4571 struct tree_backref *back;
4573 while(cur != &rec->backrefs) {
4574 node = to_extent_backref(cur);
4578 back = to_tree_backref(node);
4580 if (!node->full_backref)
4582 if (parent == back->parent)
4585 if (node->full_backref)
4587 if (back->root == root)
4594 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4595 u64 parent, u64 root)
4597 struct tree_backref *ref = malloc(sizeof(*ref));
4601 memset(&ref->node, 0, sizeof(ref->node));
4603 ref->parent = parent;
4604 ref->node.full_backref = 1;
4607 ref->node.full_backref = 0;
4609 list_add_tail(&ref->node.list, &rec->backrefs);
4614 static struct data_backref *find_data_backref(struct extent_record *rec,
4615 u64 parent, u64 root,
4616 u64 owner, u64 offset,
4618 u64 disk_bytenr, u64 bytes)
4620 struct list_head *cur = rec->backrefs.next;
4621 struct extent_backref *node;
4622 struct data_backref *back;
4624 while(cur != &rec->backrefs) {
4625 node = to_extent_backref(cur);
4629 back = to_data_backref(node);
4631 if (!node->full_backref)
4633 if (parent == back->parent)
4636 if (node->full_backref)
4638 if (back->root == root && back->owner == owner &&
4639 back->offset == offset) {
4640 if (found_ref && node->found_ref &&
4641 (back->bytes != bytes ||
4642 back->disk_bytenr != disk_bytenr))
4651 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4652 u64 parent, u64 root,
4653 u64 owner, u64 offset,
4656 struct data_backref *ref = malloc(sizeof(*ref));
4660 memset(&ref->node, 0, sizeof(ref->node));
4661 ref->node.is_data = 1;
4664 ref->parent = parent;
4667 ref->node.full_backref = 1;
4671 ref->offset = offset;
4672 ref->node.full_backref = 0;
4674 ref->bytes = max_size;
4677 list_add_tail(&ref->node.list, &rec->backrefs);
4678 if (max_size > rec->max_size)
4679 rec->max_size = max_size;
4683 /* Check if the type of extent matches with its chunk */
4684 static void check_extent_type(struct extent_record *rec)
4686 struct btrfs_block_group_cache *bg_cache;
4688 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4692 /* data extent, check chunk directly*/
4693 if (!rec->metadata) {
4694 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4695 rec->wrong_chunk_type = 1;
4699 /* metadata extent, check the obvious case first */
4700 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4701 BTRFS_BLOCK_GROUP_METADATA))) {
4702 rec->wrong_chunk_type = 1;
4707 * Check SYSTEM extent, as it's also marked as metadata, we can only
4708 * make sure it's a SYSTEM extent by its backref
4710 if (!list_empty(&rec->backrefs)) {
4711 struct extent_backref *node;
4712 struct tree_backref *tback;
4715 node = to_extent_backref(rec->backrefs.next);
4716 if (node->is_data) {
4717 /* tree block shouldn't have data backref */
4718 rec->wrong_chunk_type = 1;
4721 tback = container_of(node, struct tree_backref, node);
4723 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4724 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4726 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4727 if (!(bg_cache->flags & bg_type))
4728 rec->wrong_chunk_type = 1;
4733 * Allocate a new extent record, fill default values from @tmpl and insert int
4734 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4735 * the cache, otherwise it fails.
4737 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4738 struct extent_record *tmpl)
4740 struct extent_record *rec;
4743 rec = malloc(sizeof(*rec));
4746 rec->start = tmpl->start;
4747 rec->max_size = tmpl->max_size;
4748 rec->nr = max(tmpl->nr, tmpl->max_size);
4749 rec->found_rec = tmpl->found_rec;
4750 rec->content_checked = tmpl->content_checked;
4751 rec->owner_ref_checked = tmpl->owner_ref_checked;
4752 rec->num_duplicates = 0;
4753 rec->metadata = tmpl->metadata;
4754 rec->flag_block_full_backref = FLAG_UNSET;
4755 rec->bad_full_backref = 0;
4756 rec->crossing_stripes = 0;
4757 rec->wrong_chunk_type = 0;
4758 rec->is_root = tmpl->is_root;
4759 rec->refs = tmpl->refs;
4760 rec->extent_item_refs = tmpl->extent_item_refs;
4761 rec->parent_generation = tmpl->parent_generation;
4762 INIT_LIST_HEAD(&rec->backrefs);
4763 INIT_LIST_HEAD(&rec->dups);
4764 INIT_LIST_HEAD(&rec->list);
4765 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4766 rec->cache.start = tmpl->start;
4767 rec->cache.size = tmpl->nr;
4768 ret = insert_cache_extent(extent_cache, &rec->cache);
4773 bytes_used += rec->nr;
4776 rec->crossing_stripes = check_crossing_stripes(global_info,
4777 rec->start, global_info->tree_root->nodesize);
4778 check_extent_type(rec);
4783 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4785 * - refs - if found, increase refs
4786 * - is_root - if found, set
4787 * - content_checked - if found, set
4788 * - owner_ref_checked - if found, set
4790 * If not found, create a new one, initialize and insert.
4792 static int add_extent_rec(struct cache_tree *extent_cache,
4793 struct extent_record *tmpl)
4795 struct extent_record *rec;
4796 struct cache_extent *cache;
4800 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4802 rec = container_of(cache, struct extent_record, cache);
4806 rec->nr = max(tmpl->nr, tmpl->max_size);
4809 * We need to make sure to reset nr to whatever the extent
4810 * record says was the real size, this way we can compare it to
4813 if (tmpl->found_rec) {
4814 if (tmpl->start != rec->start || rec->found_rec) {
4815 struct extent_record *tmp;
4818 if (list_empty(&rec->list))
4819 list_add_tail(&rec->list,
4820 &duplicate_extents);
4823 * We have to do this song and dance in case we
4824 * find an extent record that falls inside of
4825 * our current extent record but does not have
4826 * the same objectid.
4828 tmp = malloc(sizeof(*tmp));
4831 tmp->start = tmpl->start;
4832 tmp->max_size = tmpl->max_size;
4835 tmp->metadata = tmpl->metadata;
4836 tmp->extent_item_refs = tmpl->extent_item_refs;
4837 INIT_LIST_HEAD(&tmp->list);
4838 list_add_tail(&tmp->list, &rec->dups);
4839 rec->num_duplicates++;
4846 if (tmpl->extent_item_refs && !dup) {
4847 if (rec->extent_item_refs) {
4848 fprintf(stderr, "block %llu rec "
4849 "extent_item_refs %llu, passed %llu\n",
4850 (unsigned long long)tmpl->start,
4851 (unsigned long long)
4852 rec->extent_item_refs,
4853 (unsigned long long)tmpl->extent_item_refs);
4855 rec->extent_item_refs = tmpl->extent_item_refs;
4859 if (tmpl->content_checked)
4860 rec->content_checked = 1;
4861 if (tmpl->owner_ref_checked)
4862 rec->owner_ref_checked = 1;
4863 memcpy(&rec->parent_key, &tmpl->parent_key,
4864 sizeof(tmpl->parent_key));
4865 if (tmpl->parent_generation)
4866 rec->parent_generation = tmpl->parent_generation;
4867 if (rec->max_size < tmpl->max_size)
4868 rec->max_size = tmpl->max_size;
4871 * A metadata extent can't cross stripe_len boundary, otherwise
4872 * kernel scrub won't be able to handle it.
4873 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4877 rec->crossing_stripes = check_crossing_stripes(
4878 global_info, rec->start,
4879 global_info->tree_root->nodesize);
4880 check_extent_type(rec);
4881 maybe_free_extent_rec(extent_cache, rec);
4885 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4890 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4891 u64 parent, u64 root, int found_ref)
4893 struct extent_record *rec;
4894 struct tree_backref *back;
4895 struct cache_extent *cache;
4898 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4900 struct extent_record tmpl;
4902 memset(&tmpl, 0, sizeof(tmpl));
4903 tmpl.start = bytenr;
4907 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4911 /* really a bug in cache_extent implement now */
4912 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4917 rec = container_of(cache, struct extent_record, cache);
4918 if (rec->start != bytenr) {
4920 * Several cause, from unaligned bytenr to over lapping extents
4925 back = find_tree_backref(rec, parent, root);
4927 back = alloc_tree_backref(rec, parent, root);
4933 if (back->node.found_ref) {
4934 fprintf(stderr, "Extent back ref already exists "
4935 "for %llu parent %llu root %llu \n",
4936 (unsigned long long)bytenr,
4937 (unsigned long long)parent,
4938 (unsigned long long)root);
4940 back->node.found_ref = 1;
4942 if (back->node.found_extent_tree) {
4943 fprintf(stderr, "Extent back ref already exists "
4944 "for %llu parent %llu root %llu \n",
4945 (unsigned long long)bytenr,
4946 (unsigned long long)parent,
4947 (unsigned long long)root);
4949 back->node.found_extent_tree = 1;
4951 check_extent_type(rec);
4952 maybe_free_extent_rec(extent_cache, rec);
4956 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4957 u64 parent, u64 root, u64 owner, u64 offset,
4958 u32 num_refs, int found_ref, u64 max_size)
4960 struct extent_record *rec;
4961 struct data_backref *back;
4962 struct cache_extent *cache;
4965 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4967 struct extent_record tmpl;
4969 memset(&tmpl, 0, sizeof(tmpl));
4970 tmpl.start = bytenr;
4972 tmpl.max_size = max_size;
4974 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
4978 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4983 rec = container_of(cache, struct extent_record, cache);
4984 if (rec->max_size < max_size)
4985 rec->max_size = max_size;
4988 * If found_ref is set then max_size is the real size and must match the
4989 * existing refs. So if we have already found a ref then we need to
4990 * make sure that this ref matches the existing one, otherwise we need
4991 * to add a new backref so we can notice that the backrefs don't match
4992 * and we need to figure out who is telling the truth. This is to
4993 * account for that awful fsync bug I introduced where we'd end up with
4994 * a btrfs_file_extent_item that would have its length include multiple
4995 * prealloc extents or point inside of a prealloc extent.
4997 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
5000 back = alloc_data_backref(rec, parent, root, owner, offset,
5006 BUG_ON(num_refs != 1);
5007 if (back->node.found_ref)
5008 BUG_ON(back->bytes != max_size);
5009 back->node.found_ref = 1;
5010 back->found_ref += 1;
5011 back->bytes = max_size;
5012 back->disk_bytenr = bytenr;
5014 rec->content_checked = 1;
5015 rec->owner_ref_checked = 1;
5017 if (back->node.found_extent_tree) {
5018 fprintf(stderr, "Extent back ref already exists "
5019 "for %llu parent %llu root %llu "
5020 "owner %llu offset %llu num_refs %lu\n",
5021 (unsigned long long)bytenr,
5022 (unsigned long long)parent,
5023 (unsigned long long)root,
5024 (unsigned long long)owner,
5025 (unsigned long long)offset,
5026 (unsigned long)num_refs);
5028 back->num_refs = num_refs;
5029 back->node.found_extent_tree = 1;
5031 maybe_free_extent_rec(extent_cache, rec);
5035 static int add_pending(struct cache_tree *pending,
5036 struct cache_tree *seen, u64 bytenr, u32 size)
5039 ret = add_cache_extent(seen, bytenr, size);
5042 add_cache_extent(pending, bytenr, size);
5046 static int pick_next_pending(struct cache_tree *pending,
5047 struct cache_tree *reada,
5048 struct cache_tree *nodes,
5049 u64 last, struct block_info *bits, int bits_nr,
5052 unsigned long node_start = last;
5053 struct cache_extent *cache;
5056 cache = search_cache_extent(reada, 0);
5058 bits[0].start = cache->start;
5059 bits[0].size = cache->size;
5064 if (node_start > 32768)
5065 node_start -= 32768;
5067 cache = search_cache_extent(nodes, node_start);
5069 cache = search_cache_extent(nodes, 0);
5072 cache = search_cache_extent(pending, 0);
5077 bits[ret].start = cache->start;
5078 bits[ret].size = cache->size;
5079 cache = next_cache_extent(cache);
5081 } while (cache && ret < bits_nr);
5087 bits[ret].start = cache->start;
5088 bits[ret].size = cache->size;
5089 cache = next_cache_extent(cache);
5091 } while (cache && ret < bits_nr);
5093 if (bits_nr - ret > 8) {
5094 u64 lookup = bits[0].start + bits[0].size;
5095 struct cache_extent *next;
5096 next = search_cache_extent(pending, lookup);
5098 if (next->start - lookup > 32768)
5100 bits[ret].start = next->start;
5101 bits[ret].size = next->size;
5102 lookup = next->start + next->size;
5106 next = next_cache_extent(next);
5114 static void free_chunk_record(struct cache_extent *cache)
5116 struct chunk_record *rec;
5118 rec = container_of(cache, struct chunk_record, cache);
5119 list_del_init(&rec->list);
5120 list_del_init(&rec->dextents);
5124 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5126 cache_tree_free_extents(chunk_cache, free_chunk_record);
5129 static void free_device_record(struct rb_node *node)
5131 struct device_record *rec;
5133 rec = container_of(node, struct device_record, node);
5137 FREE_RB_BASED_TREE(device_cache, free_device_record);
5139 int insert_block_group_record(struct block_group_tree *tree,
5140 struct block_group_record *bg_rec)
5144 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5148 list_add_tail(&bg_rec->list, &tree->block_groups);
5152 static void free_block_group_record(struct cache_extent *cache)
5154 struct block_group_record *rec;
5156 rec = container_of(cache, struct block_group_record, cache);
5157 list_del_init(&rec->list);
5161 void free_block_group_tree(struct block_group_tree *tree)
5163 cache_tree_free_extents(&tree->tree, free_block_group_record);
5166 int insert_device_extent_record(struct device_extent_tree *tree,
5167 struct device_extent_record *de_rec)
5172 * Device extent is a bit different from the other extents, because
5173 * the extents which belong to the different devices may have the
5174 * same start and size, so we need use the special extent cache
5175 * search/insert functions.
5177 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5181 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5182 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5186 static void free_device_extent_record(struct cache_extent *cache)
5188 struct device_extent_record *rec;
5190 rec = container_of(cache, struct device_extent_record, cache);
5191 if (!list_empty(&rec->chunk_list))
5192 list_del_init(&rec->chunk_list);
5193 if (!list_empty(&rec->device_list))
5194 list_del_init(&rec->device_list);
5198 void free_device_extent_tree(struct device_extent_tree *tree)
5200 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5204 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5205 struct extent_buffer *leaf, int slot)
5207 struct btrfs_extent_ref_v0 *ref0;
5208 struct btrfs_key key;
5211 btrfs_item_key_to_cpu(leaf, &key, slot);
5212 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5213 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5214 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5217 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5218 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5224 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5225 struct btrfs_key *key,
5228 struct btrfs_chunk *ptr;
5229 struct chunk_record *rec;
5232 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5233 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5235 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5237 fprintf(stderr, "memory allocation failed\n");
5241 INIT_LIST_HEAD(&rec->list);
5242 INIT_LIST_HEAD(&rec->dextents);
5245 rec->cache.start = key->offset;
5246 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5248 rec->generation = btrfs_header_generation(leaf);
5250 rec->objectid = key->objectid;
5251 rec->type = key->type;
5252 rec->offset = key->offset;
5254 rec->length = rec->cache.size;
5255 rec->owner = btrfs_chunk_owner(leaf, ptr);
5256 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5257 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5258 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5259 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5260 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5261 rec->num_stripes = num_stripes;
5262 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5264 for (i = 0; i < rec->num_stripes; ++i) {
5265 rec->stripes[i].devid =
5266 btrfs_stripe_devid_nr(leaf, ptr, i);
5267 rec->stripes[i].offset =
5268 btrfs_stripe_offset_nr(leaf, ptr, i);
5269 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5270 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5277 static int process_chunk_item(struct cache_tree *chunk_cache,
5278 struct btrfs_key *key, struct extent_buffer *eb,
5281 struct chunk_record *rec;
5282 struct btrfs_chunk *chunk;
5285 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5287 * Do extra check for this chunk item,
5289 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5290 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5291 * and owner<->key_type check.
5293 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5296 error("chunk(%llu, %llu) is not valid, ignore it",
5297 key->offset, btrfs_chunk_length(eb, chunk));
5300 rec = btrfs_new_chunk_record(eb, key, slot);
5301 ret = insert_cache_extent(chunk_cache, &rec->cache);
5303 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5304 rec->offset, rec->length);
5311 static int process_device_item(struct rb_root *dev_cache,
5312 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5314 struct btrfs_dev_item *ptr;
5315 struct device_record *rec;
5318 ptr = btrfs_item_ptr(eb,
5319 slot, struct btrfs_dev_item);
5321 rec = malloc(sizeof(*rec));
5323 fprintf(stderr, "memory allocation failed\n");
5327 rec->devid = key->offset;
5328 rec->generation = btrfs_header_generation(eb);
5330 rec->objectid = key->objectid;
5331 rec->type = key->type;
5332 rec->offset = key->offset;
5334 rec->devid = btrfs_device_id(eb, ptr);
5335 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5336 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5338 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5340 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5347 struct block_group_record *
5348 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5351 struct btrfs_block_group_item *ptr;
5352 struct block_group_record *rec;
5354 rec = calloc(1, sizeof(*rec));
5356 fprintf(stderr, "memory allocation failed\n");
5360 rec->cache.start = key->objectid;
5361 rec->cache.size = key->offset;
5363 rec->generation = btrfs_header_generation(leaf);
5365 rec->objectid = key->objectid;
5366 rec->type = key->type;
5367 rec->offset = key->offset;
5369 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5370 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5372 INIT_LIST_HEAD(&rec->list);
5377 static int process_block_group_item(struct block_group_tree *block_group_cache,
5378 struct btrfs_key *key,
5379 struct extent_buffer *eb, int slot)
5381 struct block_group_record *rec;
5384 rec = btrfs_new_block_group_record(eb, key, slot);
5385 ret = insert_block_group_record(block_group_cache, rec);
5387 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5388 rec->objectid, rec->offset);
5395 struct device_extent_record *
5396 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5397 struct btrfs_key *key, int slot)
5399 struct device_extent_record *rec;
5400 struct btrfs_dev_extent *ptr;
5402 rec = calloc(1, sizeof(*rec));
5404 fprintf(stderr, "memory allocation failed\n");
5408 rec->cache.objectid = key->objectid;
5409 rec->cache.start = key->offset;
5411 rec->generation = btrfs_header_generation(leaf);
5413 rec->objectid = key->objectid;
5414 rec->type = key->type;
5415 rec->offset = key->offset;
5417 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5418 rec->chunk_objecteid =
5419 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5421 btrfs_dev_extent_chunk_offset(leaf, ptr);
5422 rec->length = btrfs_dev_extent_length(leaf, ptr);
5423 rec->cache.size = rec->length;
5425 INIT_LIST_HEAD(&rec->chunk_list);
5426 INIT_LIST_HEAD(&rec->device_list);
5432 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5433 struct btrfs_key *key, struct extent_buffer *eb,
5436 struct device_extent_record *rec;
5439 rec = btrfs_new_device_extent_record(eb, key, slot);
5440 ret = insert_device_extent_record(dev_extent_cache, rec);
5443 "Device extent[%llu, %llu, %llu] existed.\n",
5444 rec->objectid, rec->offset, rec->length);
5451 static int process_extent_item(struct btrfs_root *root,
5452 struct cache_tree *extent_cache,
5453 struct extent_buffer *eb, int slot)
5455 struct btrfs_extent_item *ei;
5456 struct btrfs_extent_inline_ref *iref;
5457 struct btrfs_extent_data_ref *dref;
5458 struct btrfs_shared_data_ref *sref;
5459 struct btrfs_key key;
5460 struct extent_record tmpl;
5465 u32 item_size = btrfs_item_size_nr(eb, slot);
5471 btrfs_item_key_to_cpu(eb, &key, slot);
5473 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5475 num_bytes = root->nodesize;
5477 num_bytes = key.offset;
5480 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5481 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5482 key.objectid, root->sectorsize);
5485 if (item_size < sizeof(*ei)) {
5486 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5487 struct btrfs_extent_item_v0 *ei0;
5488 BUG_ON(item_size != sizeof(*ei0));
5489 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5490 refs = btrfs_extent_refs_v0(eb, ei0);
5494 memset(&tmpl, 0, sizeof(tmpl));
5495 tmpl.start = key.objectid;
5496 tmpl.nr = num_bytes;
5497 tmpl.extent_item_refs = refs;
5498 tmpl.metadata = metadata;
5500 tmpl.max_size = num_bytes;
5502 return add_extent_rec(extent_cache, &tmpl);
5505 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5506 refs = btrfs_extent_refs(eb, ei);
5507 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5511 if (metadata && num_bytes != root->nodesize) {
5512 error("ignore invalid metadata extent, length %llu does not equal to %u",
5513 num_bytes, root->nodesize);
5516 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5517 error("ignore invalid data extent, length %llu is not aligned to %u",
5518 num_bytes, root->sectorsize);
5522 memset(&tmpl, 0, sizeof(tmpl));
5523 tmpl.start = key.objectid;
5524 tmpl.nr = num_bytes;
5525 tmpl.extent_item_refs = refs;
5526 tmpl.metadata = metadata;
5528 tmpl.max_size = num_bytes;
5529 add_extent_rec(extent_cache, &tmpl);
5531 ptr = (unsigned long)(ei + 1);
5532 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5533 key.type == BTRFS_EXTENT_ITEM_KEY)
5534 ptr += sizeof(struct btrfs_tree_block_info);
5536 end = (unsigned long)ei + item_size;
5538 iref = (struct btrfs_extent_inline_ref *)ptr;
5539 type = btrfs_extent_inline_ref_type(eb, iref);
5540 offset = btrfs_extent_inline_ref_offset(eb, iref);
5542 case BTRFS_TREE_BLOCK_REF_KEY:
5543 ret = add_tree_backref(extent_cache, key.objectid,
5546 error("add_tree_backref failed: %s",
5549 case BTRFS_SHARED_BLOCK_REF_KEY:
5550 ret = add_tree_backref(extent_cache, key.objectid,
5553 error("add_tree_backref failed: %s",
5556 case BTRFS_EXTENT_DATA_REF_KEY:
5557 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5558 add_data_backref(extent_cache, key.objectid, 0,
5559 btrfs_extent_data_ref_root(eb, dref),
5560 btrfs_extent_data_ref_objectid(eb,
5562 btrfs_extent_data_ref_offset(eb, dref),
5563 btrfs_extent_data_ref_count(eb, dref),
5566 case BTRFS_SHARED_DATA_REF_KEY:
5567 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5568 add_data_backref(extent_cache, key.objectid, offset,
5570 btrfs_shared_data_ref_count(eb, sref),
5574 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5575 key.objectid, key.type, num_bytes);
5578 ptr += btrfs_extent_inline_ref_size(type);
5585 static int check_cache_range(struct btrfs_root *root,
5586 struct btrfs_block_group_cache *cache,
5587 u64 offset, u64 bytes)
5589 struct btrfs_free_space *entry;
5595 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5596 bytenr = btrfs_sb_offset(i);
5597 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5598 cache->key.objectid, bytenr, 0,
5599 &logical, &nr, &stripe_len);
5604 if (logical[nr] + stripe_len <= offset)
5606 if (offset + bytes <= logical[nr])
5608 if (logical[nr] == offset) {
5609 if (stripe_len >= bytes) {
5613 bytes -= stripe_len;
5614 offset += stripe_len;
5615 } else if (logical[nr] < offset) {
5616 if (logical[nr] + stripe_len >=
5621 bytes = (offset + bytes) -
5622 (logical[nr] + stripe_len);
5623 offset = logical[nr] + stripe_len;
5626 * Could be tricky, the super may land in the
5627 * middle of the area we're checking. First
5628 * check the easiest case, it's at the end.
5630 if (logical[nr] + stripe_len >=
5632 bytes = logical[nr] - offset;
5636 /* Check the left side */
5637 ret = check_cache_range(root, cache,
5639 logical[nr] - offset);
5645 /* Now we continue with the right side */
5646 bytes = (offset + bytes) -
5647 (logical[nr] + stripe_len);
5648 offset = logical[nr] + stripe_len;
5655 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5657 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5658 offset, offset+bytes);
5662 if (entry->offset != offset) {
5663 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5668 if (entry->bytes != bytes) {
5669 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5670 bytes, entry->bytes, offset);
5674 unlink_free_space(cache->free_space_ctl, entry);
5679 static int verify_space_cache(struct btrfs_root *root,
5680 struct btrfs_block_group_cache *cache)
5682 struct btrfs_path path;
5683 struct extent_buffer *leaf;
5684 struct btrfs_key key;
5688 root = root->fs_info->extent_root;
5690 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5692 btrfs_init_path(&path);
5693 key.objectid = last;
5695 key.type = BTRFS_EXTENT_ITEM_KEY;
5696 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5701 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5702 ret = btrfs_next_leaf(root, &path);
5710 leaf = path.nodes[0];
5711 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5712 if (key.objectid >= cache->key.offset + cache->key.objectid)
5714 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5715 key.type != BTRFS_METADATA_ITEM_KEY) {
5720 if (last == key.objectid) {
5721 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5722 last = key.objectid + key.offset;
5724 last = key.objectid + root->nodesize;
5729 ret = check_cache_range(root, cache, last,
5730 key.objectid - last);
5733 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5734 last = key.objectid + key.offset;
5736 last = key.objectid + root->nodesize;
5740 if (last < cache->key.objectid + cache->key.offset)
5741 ret = check_cache_range(root, cache, last,
5742 cache->key.objectid +
5743 cache->key.offset - last);
5746 btrfs_release_path(&path);
5749 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5750 fprintf(stderr, "There are still entries left in the space "
5758 static int check_space_cache(struct btrfs_root *root)
5760 struct btrfs_block_group_cache *cache;
5761 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5765 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5766 btrfs_super_generation(root->fs_info->super_copy) !=
5767 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5768 printf("cache and super generation don't match, space cache "
5769 "will be invalidated\n");
5773 if (ctx.progress_enabled) {
5774 ctx.tp = TASK_FREE_SPACE;
5775 task_start(ctx.info);
5779 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5783 start = cache->key.objectid + cache->key.offset;
5784 if (!cache->free_space_ctl) {
5785 if (btrfs_init_free_space_ctl(cache,
5786 root->sectorsize)) {
5791 btrfs_remove_free_space_cache(cache);
5794 if (btrfs_fs_compat_ro(root->fs_info,
5795 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5796 ret = exclude_super_stripes(root, cache);
5798 fprintf(stderr, "could not exclude super stripes: %s\n",
5803 ret = load_free_space_tree(root->fs_info, cache);
5804 free_excluded_extents(root, cache);
5806 fprintf(stderr, "could not load free space tree: %s\n",
5813 ret = load_free_space_cache(root->fs_info, cache);
5818 ret = verify_space_cache(root, cache);
5820 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5821 cache->key.objectid);
5826 task_stop(ctx.info);
5828 return error ? -EINVAL : 0;
5831 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5832 u64 num_bytes, unsigned long leaf_offset,
5833 struct extent_buffer *eb) {
5836 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5838 unsigned long csum_offset;
5842 u64 data_checked = 0;
5848 if (num_bytes % root->sectorsize)
5851 data = malloc(num_bytes);
5855 while (offset < num_bytes) {
5858 read_len = num_bytes - offset;
5859 /* read as much space once a time */
5860 ret = read_extent_data(root, data + offset,
5861 bytenr + offset, &read_len, mirror);
5865 /* verify every 4k data's checksum */
5866 while (data_checked < read_len) {
5868 tmp = offset + data_checked;
5870 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5871 csum, root->sectorsize);
5872 btrfs_csum_final(csum, (u8 *)&csum);
5874 csum_offset = leaf_offset +
5875 tmp / root->sectorsize * csum_size;
5876 read_extent_buffer(eb, (char *)&csum_expected,
5877 csum_offset, csum_size);
5878 /* try another mirror */
5879 if (csum != csum_expected) {
5880 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5881 mirror, bytenr + tmp,
5882 csum, csum_expected);
5883 num_copies = btrfs_num_copies(
5884 &root->fs_info->mapping_tree,
5886 if (mirror < num_copies - 1) {
5891 data_checked += root->sectorsize;
5900 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5903 struct btrfs_path path;
5904 struct extent_buffer *leaf;
5905 struct btrfs_key key;
5908 btrfs_init_path(&path);
5909 key.objectid = bytenr;
5910 key.type = BTRFS_EXTENT_ITEM_KEY;
5911 key.offset = (u64)-1;
5914 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
5917 fprintf(stderr, "Error looking up extent record %d\n", ret);
5918 btrfs_release_path(&path);
5921 if (path.slots[0] > 0) {
5924 ret = btrfs_prev_leaf(root, &path);
5927 } else if (ret > 0) {
5934 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5937 * Block group items come before extent items if they have the same
5938 * bytenr, so walk back one more just in case. Dear future traveller,
5939 * first congrats on mastering time travel. Now if it's not too much
5940 * trouble could you go back to 2006 and tell Chris to make the
5941 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5942 * EXTENT_ITEM_KEY please?
5944 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5945 if (path.slots[0] > 0) {
5948 ret = btrfs_prev_leaf(root, &path);
5951 } else if (ret > 0) {
5956 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
5960 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
5961 ret = btrfs_next_leaf(root, &path);
5963 fprintf(stderr, "Error going to next leaf "
5965 btrfs_release_path(&path);
5971 leaf = path.nodes[0];
5972 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
5973 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5977 if (key.objectid + key.offset < bytenr) {
5981 if (key.objectid > bytenr + num_bytes)
5984 if (key.objectid == bytenr) {
5985 if (key.offset >= num_bytes) {
5989 num_bytes -= key.offset;
5990 bytenr += key.offset;
5991 } else if (key.objectid < bytenr) {
5992 if (key.objectid + key.offset >= bytenr + num_bytes) {
5996 num_bytes = (bytenr + num_bytes) -
5997 (key.objectid + key.offset);
5998 bytenr = key.objectid + key.offset;
6000 if (key.objectid + key.offset < bytenr + num_bytes) {
6001 u64 new_start = key.objectid + key.offset;
6002 u64 new_bytes = bytenr + num_bytes - new_start;
6005 * Weird case, the extent is in the middle of
6006 * our range, we'll have to search one side
6007 * and then the other. Not sure if this happens
6008 * in real life, but no harm in coding it up
6009 * anyway just in case.
6011 btrfs_release_path(&path);
6012 ret = check_extent_exists(root, new_start,
6015 fprintf(stderr, "Right section didn't "
6019 num_bytes = key.objectid - bytenr;
6022 num_bytes = key.objectid - bytenr;
6029 if (num_bytes && !ret) {
6030 fprintf(stderr, "There are no extents for csum range "
6031 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6035 btrfs_release_path(&path);
6039 static int check_csums(struct btrfs_root *root)
6041 struct btrfs_path path;
6042 struct extent_buffer *leaf;
6043 struct btrfs_key key;
6044 u64 offset = 0, num_bytes = 0;
6045 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6049 unsigned long leaf_offset;
6051 root = root->fs_info->csum_root;
6052 if (!extent_buffer_uptodate(root->node)) {
6053 fprintf(stderr, "No valid csum tree found\n");
6057 btrfs_init_path(&path);
6058 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6059 key.type = BTRFS_EXTENT_CSUM_KEY;
6061 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6063 fprintf(stderr, "Error searching csum tree %d\n", ret);
6064 btrfs_release_path(&path);
6068 if (ret > 0 && path.slots[0])
6073 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
6074 ret = btrfs_next_leaf(root, &path);
6076 fprintf(stderr, "Error going to next leaf "
6083 leaf = path.nodes[0];
6085 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
6086 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6091 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
6092 csum_size) * root->sectorsize;
6093 if (!check_data_csum)
6094 goto skip_csum_check;
6095 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
6096 ret = check_extent_csums(root, key.offset, data_len,
6102 offset = key.offset;
6103 } else if (key.offset != offset + num_bytes) {
6104 ret = check_extent_exists(root, offset, num_bytes);
6106 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6107 "there is no extent record\n",
6108 offset, offset+num_bytes);
6111 offset = key.offset;
6114 num_bytes += data_len;
6118 btrfs_release_path(&path);
6122 static int is_dropped_key(struct btrfs_key *key,
6123 struct btrfs_key *drop_key) {
6124 if (key->objectid < drop_key->objectid)
6126 else if (key->objectid == drop_key->objectid) {
6127 if (key->type < drop_key->type)
6129 else if (key->type == drop_key->type) {
6130 if (key->offset < drop_key->offset)
6138 * Here are the rules for FULL_BACKREF.
6140 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6141 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6143 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6144 * if it happened after the relocation occurred since we'll have dropped the
6145 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6146 * have no real way to know for sure.
6148 * We process the blocks one root at a time, and we start from the lowest root
6149 * objectid and go to the highest. So we can just lookup the owner backref for
6150 * the record and if we don't find it then we know it doesn't exist and we have
6153 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6154 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6155 * be set or not and then we can check later once we've gathered all the refs.
6157 static int calc_extent_flag(struct btrfs_root *root,
6158 struct cache_tree *extent_cache,
6159 struct extent_buffer *buf,
6160 struct root_item_record *ri,
6163 struct extent_record *rec;
6164 struct cache_extent *cache;
6165 struct tree_backref *tback;
6168 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6169 /* we have added this extent before */
6173 rec = container_of(cache, struct extent_record, cache);
6176 * Except file/reloc tree, we can not have
6179 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6184 if (buf->start == ri->bytenr)
6187 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6190 owner = btrfs_header_owner(buf);
6191 if (owner == ri->objectid)
6194 tback = find_tree_backref(rec, 0, owner);
6199 if (rec->flag_block_full_backref != FLAG_UNSET &&
6200 rec->flag_block_full_backref != 0)
6201 rec->bad_full_backref = 1;
6204 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6205 if (rec->flag_block_full_backref != FLAG_UNSET &&
6206 rec->flag_block_full_backref != 1)
6207 rec->bad_full_backref = 1;
6211 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6213 fprintf(stderr, "Invalid key type(");
6214 print_key_type(stderr, 0, key_type);
6215 fprintf(stderr, ") found in root(");
6216 print_objectid(stderr, rootid, 0);
6217 fprintf(stderr, ")\n");
6221 * Check if the key is valid with its extent buffer.
6223 * This is a early check in case invalid key exists in a extent buffer
6224 * This is not comprehensive yet, but should prevent wrong key/item passed
6227 static int check_type_with_root(u64 rootid, u8 key_type)
6230 /* Only valid in chunk tree */
6231 case BTRFS_DEV_ITEM_KEY:
6232 case BTRFS_CHUNK_ITEM_KEY:
6233 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6236 /* valid in csum and log tree */
6237 case BTRFS_CSUM_TREE_OBJECTID:
6238 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6242 case BTRFS_EXTENT_ITEM_KEY:
6243 case BTRFS_METADATA_ITEM_KEY:
6244 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6245 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6248 case BTRFS_ROOT_ITEM_KEY:
6249 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6252 case BTRFS_DEV_EXTENT_KEY:
6253 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6259 report_mismatch_key_root(key_type, rootid);
6263 static int run_next_block(struct btrfs_root *root,
6264 struct block_info *bits,
6267 struct cache_tree *pending,
6268 struct cache_tree *seen,
6269 struct cache_tree *reada,
6270 struct cache_tree *nodes,
6271 struct cache_tree *extent_cache,
6272 struct cache_tree *chunk_cache,
6273 struct rb_root *dev_cache,
6274 struct block_group_tree *block_group_cache,
6275 struct device_extent_tree *dev_extent_cache,
6276 struct root_item_record *ri)
6278 struct extent_buffer *buf;
6279 struct extent_record *rec = NULL;
6290 struct btrfs_key key;
6291 struct cache_extent *cache;
6294 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6295 bits_nr, &reada_bits);
6300 for(i = 0; i < nritems; i++) {
6301 ret = add_cache_extent(reada, bits[i].start,
6306 /* fixme, get the parent transid */
6307 readahead_tree_block(root, bits[i].start,
6311 *last = bits[0].start;
6312 bytenr = bits[0].start;
6313 size = bits[0].size;
6315 cache = lookup_cache_extent(pending, bytenr, size);
6317 remove_cache_extent(pending, cache);
6320 cache = lookup_cache_extent(reada, bytenr, size);
6322 remove_cache_extent(reada, cache);
6325 cache = lookup_cache_extent(nodes, bytenr, size);
6327 remove_cache_extent(nodes, cache);
6330 cache = lookup_cache_extent(extent_cache, bytenr, size);
6332 rec = container_of(cache, struct extent_record, cache);
6333 gen = rec->parent_generation;
6336 /* fixme, get the real parent transid */
6337 buf = read_tree_block(root, bytenr, size, gen);
6338 if (!extent_buffer_uptodate(buf)) {
6339 record_bad_block_io(root->fs_info,
6340 extent_cache, bytenr, size);
6344 nritems = btrfs_header_nritems(buf);
6347 if (!init_extent_tree) {
6348 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6349 btrfs_header_level(buf), 1, NULL,
6352 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6354 fprintf(stderr, "Couldn't calc extent flags\n");
6355 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6360 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6362 fprintf(stderr, "Couldn't calc extent flags\n");
6363 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6367 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6369 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6370 ri->objectid == btrfs_header_owner(buf)) {
6372 * Ok we got to this block from it's original owner and
6373 * we have FULL_BACKREF set. Relocation can leave
6374 * converted blocks over so this is altogether possible,
6375 * however it's not possible if the generation > the
6376 * last snapshot, so check for this case.
6378 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6379 btrfs_header_generation(buf) > ri->last_snapshot) {
6380 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6381 rec->bad_full_backref = 1;
6386 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6387 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6388 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6389 rec->bad_full_backref = 1;
6393 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6394 rec->flag_block_full_backref = 1;
6398 rec->flag_block_full_backref = 0;
6400 owner = btrfs_header_owner(buf);
6403 ret = check_block(root, extent_cache, buf, flags);
6407 if (btrfs_is_leaf(buf)) {
6408 btree_space_waste += btrfs_leaf_free_space(root, buf);
6409 for (i = 0; i < nritems; i++) {
6410 struct btrfs_file_extent_item *fi;
6411 btrfs_item_key_to_cpu(buf, &key, i);
6413 * Check key type against the leaf owner.
6414 * Could filter quite a lot of early error if
6417 if (check_type_with_root(btrfs_header_owner(buf),
6419 fprintf(stderr, "ignoring invalid key\n");
6422 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6423 process_extent_item(root, extent_cache, buf,
6427 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6428 process_extent_item(root, extent_cache, buf,
6432 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6434 btrfs_item_size_nr(buf, i);
6437 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6438 process_chunk_item(chunk_cache, &key, buf, i);
6441 if (key.type == BTRFS_DEV_ITEM_KEY) {
6442 process_device_item(dev_cache, &key, buf, i);
6445 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6446 process_block_group_item(block_group_cache,
6450 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6451 process_device_extent_item(dev_extent_cache,
6456 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6457 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6458 process_extent_ref_v0(extent_cache, buf, i);
6465 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6466 ret = add_tree_backref(extent_cache,
6467 key.objectid, 0, key.offset, 0);
6469 error("add_tree_backref failed: %s",
6473 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6474 ret = add_tree_backref(extent_cache,
6475 key.objectid, key.offset, 0, 0);
6477 error("add_tree_backref failed: %s",
6481 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6482 struct btrfs_extent_data_ref *ref;
6483 ref = btrfs_item_ptr(buf, i,
6484 struct btrfs_extent_data_ref);
6485 add_data_backref(extent_cache,
6487 btrfs_extent_data_ref_root(buf, ref),
6488 btrfs_extent_data_ref_objectid(buf,
6490 btrfs_extent_data_ref_offset(buf, ref),
6491 btrfs_extent_data_ref_count(buf, ref),
6492 0, root->sectorsize);
6495 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6496 struct btrfs_shared_data_ref *ref;
6497 ref = btrfs_item_ptr(buf, i,
6498 struct btrfs_shared_data_ref);
6499 add_data_backref(extent_cache,
6500 key.objectid, key.offset, 0, 0, 0,
6501 btrfs_shared_data_ref_count(buf, ref),
6502 0, root->sectorsize);
6505 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6506 struct bad_item *bad;
6508 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6512 bad = malloc(sizeof(struct bad_item));
6515 INIT_LIST_HEAD(&bad->list);
6516 memcpy(&bad->key, &key,
6517 sizeof(struct btrfs_key));
6518 bad->root_id = owner;
6519 list_add_tail(&bad->list, &delete_items);
6522 if (key.type != BTRFS_EXTENT_DATA_KEY)
6524 fi = btrfs_item_ptr(buf, i,
6525 struct btrfs_file_extent_item);
6526 if (btrfs_file_extent_type(buf, fi) ==
6527 BTRFS_FILE_EXTENT_INLINE)
6529 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6532 data_bytes_allocated +=
6533 btrfs_file_extent_disk_num_bytes(buf, fi);
6534 if (data_bytes_allocated < root->sectorsize) {
6537 data_bytes_referenced +=
6538 btrfs_file_extent_num_bytes(buf, fi);
6539 add_data_backref(extent_cache,
6540 btrfs_file_extent_disk_bytenr(buf, fi),
6541 parent, owner, key.objectid, key.offset -
6542 btrfs_file_extent_offset(buf, fi), 1, 1,
6543 btrfs_file_extent_disk_num_bytes(buf, fi));
6547 struct btrfs_key first_key;
6549 first_key.objectid = 0;
6552 btrfs_item_key_to_cpu(buf, &first_key, 0);
6553 level = btrfs_header_level(buf);
6554 for (i = 0; i < nritems; i++) {
6555 struct extent_record tmpl;
6557 ptr = btrfs_node_blockptr(buf, i);
6558 size = root->nodesize;
6559 btrfs_node_key_to_cpu(buf, &key, i);
6561 if ((level == ri->drop_level)
6562 && is_dropped_key(&key, &ri->drop_key)) {
6567 memset(&tmpl, 0, sizeof(tmpl));
6568 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6569 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6574 tmpl.max_size = size;
6575 ret = add_extent_rec(extent_cache, &tmpl);
6579 ret = add_tree_backref(extent_cache, ptr, parent,
6582 error("add_tree_backref failed: %s",
6588 add_pending(nodes, seen, ptr, size);
6590 add_pending(pending, seen, ptr, size);
6593 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6594 nritems) * sizeof(struct btrfs_key_ptr);
6596 total_btree_bytes += buf->len;
6597 if (fs_root_objectid(btrfs_header_owner(buf)))
6598 total_fs_tree_bytes += buf->len;
6599 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6600 total_extent_tree_bytes += buf->len;
6601 if (!found_old_backref &&
6602 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6603 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6604 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6605 found_old_backref = 1;
6607 free_extent_buffer(buf);
6611 static int add_root_to_pending(struct extent_buffer *buf,
6612 struct cache_tree *extent_cache,
6613 struct cache_tree *pending,
6614 struct cache_tree *seen,
6615 struct cache_tree *nodes,
6618 struct extent_record tmpl;
6621 if (btrfs_header_level(buf) > 0)
6622 add_pending(nodes, seen, buf->start, buf->len);
6624 add_pending(pending, seen, buf->start, buf->len);
6626 memset(&tmpl, 0, sizeof(tmpl));
6627 tmpl.start = buf->start;
6632 tmpl.max_size = buf->len;
6633 add_extent_rec(extent_cache, &tmpl);
6635 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6636 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6637 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6640 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6645 /* as we fix the tree, we might be deleting blocks that
6646 * we're tracking for repair. This hook makes sure we
6647 * remove any backrefs for blocks as we are fixing them.
6649 static int free_extent_hook(struct btrfs_trans_handle *trans,
6650 struct btrfs_root *root,
6651 u64 bytenr, u64 num_bytes, u64 parent,
6652 u64 root_objectid, u64 owner, u64 offset,
6655 struct extent_record *rec;
6656 struct cache_extent *cache;
6658 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6660 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6661 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6665 rec = container_of(cache, struct extent_record, cache);
6667 struct data_backref *back;
6668 back = find_data_backref(rec, parent, root_objectid, owner,
6669 offset, 1, bytenr, num_bytes);
6672 if (back->node.found_ref) {
6673 back->found_ref -= refs_to_drop;
6675 rec->refs -= refs_to_drop;
6677 if (back->node.found_extent_tree) {
6678 back->num_refs -= refs_to_drop;
6679 if (rec->extent_item_refs)
6680 rec->extent_item_refs -= refs_to_drop;
6682 if (back->found_ref == 0)
6683 back->node.found_ref = 0;
6684 if (back->num_refs == 0)
6685 back->node.found_extent_tree = 0;
6687 if (!back->node.found_extent_tree && back->node.found_ref) {
6688 list_del(&back->node.list);
6692 struct tree_backref *back;
6693 back = find_tree_backref(rec, parent, root_objectid);
6696 if (back->node.found_ref) {
6699 back->node.found_ref = 0;
6701 if (back->node.found_extent_tree) {
6702 if (rec->extent_item_refs)
6703 rec->extent_item_refs--;
6704 back->node.found_extent_tree = 0;
6706 if (!back->node.found_extent_tree && back->node.found_ref) {
6707 list_del(&back->node.list);
6711 maybe_free_extent_rec(extent_cache, rec);
6716 static int delete_extent_records(struct btrfs_trans_handle *trans,
6717 struct btrfs_root *root,
6718 struct btrfs_path *path,
6719 u64 bytenr, u64 new_len)
6721 struct btrfs_key key;
6722 struct btrfs_key found_key;
6723 struct extent_buffer *leaf;
6728 key.objectid = bytenr;
6730 key.offset = (u64)-1;
6733 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6740 if (path->slots[0] == 0)
6746 leaf = path->nodes[0];
6747 slot = path->slots[0];
6749 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6750 if (found_key.objectid != bytenr)
6753 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6754 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6755 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6756 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6757 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6758 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6759 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6760 btrfs_release_path(path);
6761 if (found_key.type == 0) {
6762 if (found_key.offset == 0)
6764 key.offset = found_key.offset - 1;
6765 key.type = found_key.type;
6767 key.type = found_key.type - 1;
6768 key.offset = (u64)-1;
6772 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6773 found_key.objectid, found_key.type, found_key.offset);
6775 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6778 btrfs_release_path(path);
6780 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6781 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6782 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6783 found_key.offset : root->nodesize;
6785 ret = btrfs_update_block_group(trans, root, bytenr,
6792 btrfs_release_path(path);
6797 * for a single backref, this will allocate a new extent
6798 * and add the backref to it.
6800 static int record_extent(struct btrfs_trans_handle *trans,
6801 struct btrfs_fs_info *info,
6802 struct btrfs_path *path,
6803 struct extent_record *rec,
6804 struct extent_backref *back,
6805 int allocated, u64 flags)
6808 struct btrfs_root *extent_root = info->extent_root;
6809 struct extent_buffer *leaf;
6810 struct btrfs_key ins_key;
6811 struct btrfs_extent_item *ei;
6812 struct data_backref *dback;
6813 struct btrfs_tree_block_info *bi;
6816 rec->max_size = max_t(u64, rec->max_size,
6817 info->extent_root->nodesize);
6820 u32 item_size = sizeof(*ei);
6823 item_size += sizeof(*bi);
6825 ins_key.objectid = rec->start;
6826 ins_key.offset = rec->max_size;
6827 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6829 ret = btrfs_insert_empty_item(trans, extent_root, path,
6830 &ins_key, item_size);
6834 leaf = path->nodes[0];
6835 ei = btrfs_item_ptr(leaf, path->slots[0],
6836 struct btrfs_extent_item);
6838 btrfs_set_extent_refs(leaf, ei, 0);
6839 btrfs_set_extent_generation(leaf, ei, rec->generation);
6841 if (back->is_data) {
6842 btrfs_set_extent_flags(leaf, ei,
6843 BTRFS_EXTENT_FLAG_DATA);
6845 struct btrfs_disk_key copy_key;;
6847 bi = (struct btrfs_tree_block_info *)(ei + 1);
6848 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6851 btrfs_set_disk_key_objectid(©_key,
6852 rec->info_objectid);
6853 btrfs_set_disk_key_type(©_key, 0);
6854 btrfs_set_disk_key_offset(©_key, 0);
6856 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6857 btrfs_set_tree_block_key(leaf, bi, ©_key);
6859 btrfs_set_extent_flags(leaf, ei,
6860 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6863 btrfs_mark_buffer_dirty(leaf);
6864 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6865 rec->max_size, 1, 0);
6868 btrfs_release_path(path);
6871 if (back->is_data) {
6875 dback = to_data_backref(back);
6876 if (back->full_backref)
6877 parent = dback->parent;
6881 for (i = 0; i < dback->found_ref; i++) {
6882 /* if parent != 0, we're doing a full backref
6883 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6884 * just makes the backref allocator create a data
6887 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6888 rec->start, rec->max_size,
6892 BTRFS_FIRST_FREE_OBJECTID :
6898 fprintf(stderr, "adding new data backref"
6899 " on %llu %s %llu owner %llu"
6900 " offset %llu found %d\n",
6901 (unsigned long long)rec->start,
6902 back->full_backref ?
6904 back->full_backref ?
6905 (unsigned long long)parent :
6906 (unsigned long long)dback->root,
6907 (unsigned long long)dback->owner,
6908 (unsigned long long)dback->offset,
6912 struct tree_backref *tback;
6914 tback = to_tree_backref(back);
6915 if (back->full_backref)
6916 parent = tback->parent;
6920 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6921 rec->start, rec->max_size,
6922 parent, tback->root, 0, 0);
6923 fprintf(stderr, "adding new tree backref on "
6924 "start %llu len %llu parent %llu root %llu\n",
6925 rec->start, rec->max_size, parent, tback->root);
6928 btrfs_release_path(path);
6932 static struct extent_entry *find_entry(struct list_head *entries,
6933 u64 bytenr, u64 bytes)
6935 struct extent_entry *entry = NULL;
6937 list_for_each_entry(entry, entries, list) {
6938 if (entry->bytenr == bytenr && entry->bytes == bytes)
6945 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6947 struct extent_entry *entry, *best = NULL, *prev = NULL;
6949 list_for_each_entry(entry, entries, list) {
6951 * If there are as many broken entries as entries then we know
6952 * not to trust this particular entry.
6954 if (entry->broken == entry->count)
6958 * Special case, when there are only two entries and 'best' is
6968 * If our current entry == best then we can't be sure our best
6969 * is really the best, so we need to keep searching.
6971 if (best && best->count == entry->count) {
6977 /* Prev == entry, not good enough, have to keep searching */
6978 if (!prev->broken && prev->count == entry->count)
6982 best = (prev->count > entry->count) ? prev : entry;
6983 else if (best->count < entry->count)
6991 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6992 struct data_backref *dback, struct extent_entry *entry)
6994 struct btrfs_trans_handle *trans;
6995 struct btrfs_root *root;
6996 struct btrfs_file_extent_item *fi;
6997 struct extent_buffer *leaf;
6998 struct btrfs_key key;
7002 key.objectid = dback->root;
7003 key.type = BTRFS_ROOT_ITEM_KEY;
7004 key.offset = (u64)-1;
7005 root = btrfs_read_fs_root(info, &key);
7007 fprintf(stderr, "Couldn't find root for our ref\n");
7012 * The backref points to the original offset of the extent if it was
7013 * split, so we need to search down to the offset we have and then walk
7014 * forward until we find the backref we're looking for.
7016 key.objectid = dback->owner;
7017 key.type = BTRFS_EXTENT_DATA_KEY;
7018 key.offset = dback->offset;
7019 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7021 fprintf(stderr, "Error looking up ref %d\n", ret);
7026 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7027 ret = btrfs_next_leaf(root, path);
7029 fprintf(stderr, "Couldn't find our ref, next\n");
7033 leaf = path->nodes[0];
7034 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7035 if (key.objectid != dback->owner ||
7036 key.type != BTRFS_EXTENT_DATA_KEY) {
7037 fprintf(stderr, "Couldn't find our ref, search\n");
7040 fi = btrfs_item_ptr(leaf, path->slots[0],
7041 struct btrfs_file_extent_item);
7042 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7043 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7045 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7050 btrfs_release_path(path);
7052 trans = btrfs_start_transaction(root, 1);
7054 return PTR_ERR(trans);
7057 * Ok we have the key of the file extent we want to fix, now we can cow
7058 * down to the thing and fix it.
7060 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7062 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7063 key.objectid, key.type, key.offset, ret);
7067 fprintf(stderr, "Well that's odd, we just found this key "
7068 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7073 leaf = path->nodes[0];
7074 fi = btrfs_item_ptr(leaf, path->slots[0],
7075 struct btrfs_file_extent_item);
7077 if (btrfs_file_extent_compression(leaf, fi) &&
7078 dback->disk_bytenr != entry->bytenr) {
7079 fprintf(stderr, "Ref doesn't match the record start and is "
7080 "compressed, please take a btrfs-image of this file "
7081 "system and send it to a btrfs developer so they can "
7082 "complete this functionality for bytenr %Lu\n",
7083 dback->disk_bytenr);
7088 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7089 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7090 } else if (dback->disk_bytenr > entry->bytenr) {
7091 u64 off_diff, offset;
7093 off_diff = dback->disk_bytenr - entry->bytenr;
7094 offset = btrfs_file_extent_offset(leaf, fi);
7095 if (dback->disk_bytenr + offset +
7096 btrfs_file_extent_num_bytes(leaf, fi) >
7097 entry->bytenr + entry->bytes) {
7098 fprintf(stderr, "Ref is past the entry end, please "
7099 "take a btrfs-image of this file system and "
7100 "send it to a btrfs developer, ref %Lu\n",
7101 dback->disk_bytenr);
7106 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7107 btrfs_set_file_extent_offset(leaf, fi, offset);
7108 } else if (dback->disk_bytenr < entry->bytenr) {
7111 offset = btrfs_file_extent_offset(leaf, fi);
7112 if (dback->disk_bytenr + offset < entry->bytenr) {
7113 fprintf(stderr, "Ref is before the entry start, please"
7114 " take a btrfs-image of this file system and "
7115 "send it to a btrfs developer, ref %Lu\n",
7116 dback->disk_bytenr);
7121 offset += dback->disk_bytenr;
7122 offset -= entry->bytenr;
7123 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7124 btrfs_set_file_extent_offset(leaf, fi, offset);
7127 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7130 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7131 * only do this if we aren't using compression, otherwise it's a
7134 if (!btrfs_file_extent_compression(leaf, fi))
7135 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7137 printf("ram bytes may be wrong?\n");
7138 btrfs_mark_buffer_dirty(leaf);
7140 err = btrfs_commit_transaction(trans, root);
7141 btrfs_release_path(path);
7142 return ret ? ret : err;
7145 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7146 struct extent_record *rec)
7148 struct extent_backref *back;
7149 struct data_backref *dback;
7150 struct extent_entry *entry, *best = NULL;
7153 int broken_entries = 0;
7158 * Metadata is easy and the backrefs should always agree on bytenr and
7159 * size, if not we've got bigger issues.
7164 list_for_each_entry(back, &rec->backrefs, list) {
7165 if (back->full_backref || !back->is_data)
7168 dback = to_data_backref(back);
7171 * We only pay attention to backrefs that we found a real
7174 if (dback->found_ref == 0)
7178 * For now we only catch when the bytes don't match, not the
7179 * bytenr. We can easily do this at the same time, but I want
7180 * to have a fs image to test on before we just add repair
7181 * functionality willy-nilly so we know we won't screw up the
7185 entry = find_entry(&entries, dback->disk_bytenr,
7188 entry = malloc(sizeof(struct extent_entry));
7193 memset(entry, 0, sizeof(*entry));
7194 entry->bytenr = dback->disk_bytenr;
7195 entry->bytes = dback->bytes;
7196 list_add_tail(&entry->list, &entries);
7201 * If we only have on entry we may think the entries agree when
7202 * in reality they don't so we have to do some extra checking.
7204 if (dback->disk_bytenr != rec->start ||
7205 dback->bytes != rec->nr || back->broken)
7216 /* Yay all the backrefs agree, carry on good sir */
7217 if (nr_entries <= 1 && !mismatch)
7220 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7221 "%Lu\n", rec->start);
7224 * First we want to see if the backrefs can agree amongst themselves who
7225 * is right, so figure out which one of the entries has the highest
7228 best = find_most_right_entry(&entries);
7231 * Ok so we may have an even split between what the backrefs think, so
7232 * this is where we use the extent ref to see what it thinks.
7235 entry = find_entry(&entries, rec->start, rec->nr);
7236 if (!entry && (!broken_entries || !rec->found_rec)) {
7237 fprintf(stderr, "Backrefs don't agree with each other "
7238 "and extent record doesn't agree with anybody,"
7239 " so we can't fix bytenr %Lu bytes %Lu\n",
7240 rec->start, rec->nr);
7243 } else if (!entry) {
7245 * Ok our backrefs were broken, we'll assume this is the
7246 * correct value and add an entry for this range.
7248 entry = malloc(sizeof(struct extent_entry));
7253 memset(entry, 0, sizeof(*entry));
7254 entry->bytenr = rec->start;
7255 entry->bytes = rec->nr;
7256 list_add_tail(&entry->list, &entries);
7260 best = find_most_right_entry(&entries);
7262 fprintf(stderr, "Backrefs and extent record evenly "
7263 "split on who is right, this is going to "
7264 "require user input to fix bytenr %Lu bytes "
7265 "%Lu\n", rec->start, rec->nr);
7272 * I don't think this can happen currently as we'll abort() if we catch
7273 * this case higher up, but in case somebody removes that we still can't
7274 * deal with it properly here yet, so just bail out of that's the case.
7276 if (best->bytenr != rec->start) {
7277 fprintf(stderr, "Extent start and backref starts don't match, "
7278 "please use btrfs-image on this file system and send "
7279 "it to a btrfs developer so they can make fsck fix "
7280 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7281 rec->start, rec->nr);
7287 * Ok great we all agreed on an extent record, let's go find the real
7288 * references and fix up the ones that don't match.
7290 list_for_each_entry(back, &rec->backrefs, list) {
7291 if (back->full_backref || !back->is_data)
7294 dback = to_data_backref(back);
7297 * Still ignoring backrefs that don't have a real ref attached
7300 if (dback->found_ref == 0)
7303 if (dback->bytes == best->bytes &&
7304 dback->disk_bytenr == best->bytenr)
7307 ret = repair_ref(info, path, dback, best);
7313 * Ok we messed with the actual refs, which means we need to drop our
7314 * entire cache and go back and rescan. I know this is a huge pain and
7315 * adds a lot of extra work, but it's the only way to be safe. Once all
7316 * the backrefs agree we may not need to do anything to the extent
7321 while (!list_empty(&entries)) {
7322 entry = list_entry(entries.next, struct extent_entry, list);
7323 list_del_init(&entry->list);
7329 static int process_duplicates(struct btrfs_root *root,
7330 struct cache_tree *extent_cache,
7331 struct extent_record *rec)
7333 struct extent_record *good, *tmp;
7334 struct cache_extent *cache;
7338 * If we found a extent record for this extent then return, or if we
7339 * have more than one duplicate we are likely going to need to delete
7342 if (rec->found_rec || rec->num_duplicates > 1)
7345 /* Shouldn't happen but just in case */
7346 BUG_ON(!rec->num_duplicates);
7349 * So this happens if we end up with a backref that doesn't match the
7350 * actual extent entry. So either the backref is bad or the extent
7351 * entry is bad. Either way we want to have the extent_record actually
7352 * reflect what we found in the extent_tree, so we need to take the
7353 * duplicate out and use that as the extent_record since the only way we
7354 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7356 remove_cache_extent(extent_cache, &rec->cache);
7358 good = to_extent_record(rec->dups.next);
7359 list_del_init(&good->list);
7360 INIT_LIST_HEAD(&good->backrefs);
7361 INIT_LIST_HEAD(&good->dups);
7362 good->cache.start = good->start;
7363 good->cache.size = good->nr;
7364 good->content_checked = 0;
7365 good->owner_ref_checked = 0;
7366 good->num_duplicates = 0;
7367 good->refs = rec->refs;
7368 list_splice_init(&rec->backrefs, &good->backrefs);
7370 cache = lookup_cache_extent(extent_cache, good->start,
7374 tmp = container_of(cache, struct extent_record, cache);
7377 * If we find another overlapping extent and it's found_rec is
7378 * set then it's a duplicate and we need to try and delete
7381 if (tmp->found_rec || tmp->num_duplicates > 0) {
7382 if (list_empty(&good->list))
7383 list_add_tail(&good->list,
7384 &duplicate_extents);
7385 good->num_duplicates += tmp->num_duplicates + 1;
7386 list_splice_init(&tmp->dups, &good->dups);
7387 list_del_init(&tmp->list);
7388 list_add_tail(&tmp->list, &good->dups);
7389 remove_cache_extent(extent_cache, &tmp->cache);
7394 * Ok we have another non extent item backed extent rec, so lets
7395 * just add it to this extent and carry on like we did above.
7397 good->refs += tmp->refs;
7398 list_splice_init(&tmp->backrefs, &good->backrefs);
7399 remove_cache_extent(extent_cache, &tmp->cache);
7402 ret = insert_cache_extent(extent_cache, &good->cache);
7405 return good->num_duplicates ? 0 : 1;
7408 static int delete_duplicate_records(struct btrfs_root *root,
7409 struct extent_record *rec)
7411 struct btrfs_trans_handle *trans;
7412 LIST_HEAD(delete_list);
7413 struct btrfs_path path;
7414 struct extent_record *tmp, *good, *n;
7417 struct btrfs_key key;
7419 btrfs_init_path(&path);
7422 /* Find the record that covers all of the duplicates. */
7423 list_for_each_entry(tmp, &rec->dups, list) {
7424 if (good->start < tmp->start)
7426 if (good->nr > tmp->nr)
7429 if (tmp->start + tmp->nr < good->start + good->nr) {
7430 fprintf(stderr, "Ok we have overlapping extents that "
7431 "aren't completely covered by each other, this "
7432 "is going to require more careful thought. "
7433 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7434 tmp->start, tmp->nr, good->start, good->nr);
7441 list_add_tail(&rec->list, &delete_list);
7443 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7446 list_move_tail(&tmp->list, &delete_list);
7449 root = root->fs_info->extent_root;
7450 trans = btrfs_start_transaction(root, 1);
7451 if (IS_ERR(trans)) {
7452 ret = PTR_ERR(trans);
7456 list_for_each_entry(tmp, &delete_list, list) {
7457 if (tmp->found_rec == 0)
7459 key.objectid = tmp->start;
7460 key.type = BTRFS_EXTENT_ITEM_KEY;
7461 key.offset = tmp->nr;
7463 /* Shouldn't happen but just in case */
7464 if (tmp->metadata) {
7465 fprintf(stderr, "Well this shouldn't happen, extent "
7466 "record overlaps but is metadata? "
7467 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7471 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
7477 ret = btrfs_del_item(trans, root, &path);
7480 btrfs_release_path(&path);
7483 err = btrfs_commit_transaction(trans, root);
7487 while (!list_empty(&delete_list)) {
7488 tmp = to_extent_record(delete_list.next);
7489 list_del_init(&tmp->list);
7495 while (!list_empty(&rec->dups)) {
7496 tmp = to_extent_record(rec->dups.next);
7497 list_del_init(&tmp->list);
7501 btrfs_release_path(&path);
7503 if (!ret && !nr_del)
7504 rec->num_duplicates = 0;
7506 return ret ? ret : nr_del;
7509 static int find_possible_backrefs(struct btrfs_fs_info *info,
7510 struct btrfs_path *path,
7511 struct cache_tree *extent_cache,
7512 struct extent_record *rec)
7514 struct btrfs_root *root;
7515 struct extent_backref *back;
7516 struct data_backref *dback;
7517 struct cache_extent *cache;
7518 struct btrfs_file_extent_item *fi;
7519 struct btrfs_key key;
7523 list_for_each_entry(back, &rec->backrefs, list) {
7524 /* Don't care about full backrefs (poor unloved backrefs) */
7525 if (back->full_backref || !back->is_data)
7528 dback = to_data_backref(back);
7530 /* We found this one, we don't need to do a lookup */
7531 if (dback->found_ref)
7534 key.objectid = dback->root;
7535 key.type = BTRFS_ROOT_ITEM_KEY;
7536 key.offset = (u64)-1;
7538 root = btrfs_read_fs_root(info, &key);
7540 /* No root, definitely a bad ref, skip */
7541 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7543 /* Other err, exit */
7545 return PTR_ERR(root);
7547 key.objectid = dback->owner;
7548 key.type = BTRFS_EXTENT_DATA_KEY;
7549 key.offset = dback->offset;
7550 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7552 btrfs_release_path(path);
7555 /* Didn't find it, we can carry on */
7560 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7561 struct btrfs_file_extent_item);
7562 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7563 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7564 btrfs_release_path(path);
7565 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7567 struct extent_record *tmp;
7568 tmp = container_of(cache, struct extent_record, cache);
7571 * If we found an extent record for the bytenr for this
7572 * particular backref then we can't add it to our
7573 * current extent record. We only want to add backrefs
7574 * that don't have a corresponding extent item in the
7575 * extent tree since they likely belong to this record
7576 * and we need to fix it if it doesn't match bytenrs.
7582 dback->found_ref += 1;
7583 dback->disk_bytenr = bytenr;
7584 dback->bytes = bytes;
7587 * Set this so the verify backref code knows not to trust the
7588 * values in this backref.
7597 * Record orphan data ref into corresponding root.
7599 * Return 0 if the extent item contains data ref and recorded.
7600 * Return 1 if the extent item contains no useful data ref
7601 * On that case, it may contains only shared_dataref or metadata backref
7602 * or the file extent exists(this should be handled by the extent bytenr
7604 * Return <0 if something goes wrong.
7606 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7607 struct extent_record *rec)
7609 struct btrfs_key key;
7610 struct btrfs_root *dest_root;
7611 struct extent_backref *back;
7612 struct data_backref *dback;
7613 struct orphan_data_extent *orphan;
7614 struct btrfs_path path;
7615 int recorded_data_ref = 0;
7620 btrfs_init_path(&path);
7621 list_for_each_entry(back, &rec->backrefs, list) {
7622 if (back->full_backref || !back->is_data ||
7623 !back->found_extent_tree)
7625 dback = to_data_backref(back);
7626 if (dback->found_ref)
7628 key.objectid = dback->root;
7629 key.type = BTRFS_ROOT_ITEM_KEY;
7630 key.offset = (u64)-1;
7632 dest_root = btrfs_read_fs_root(fs_info, &key);
7634 /* For non-exist root we just skip it */
7635 if (IS_ERR(dest_root) || !dest_root)
7638 key.objectid = dback->owner;
7639 key.type = BTRFS_EXTENT_DATA_KEY;
7640 key.offset = dback->offset;
7642 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
7643 btrfs_release_path(&path);
7645 * For ret < 0, it's OK since the fs-tree may be corrupted,
7646 * we need to record it for inode/file extent rebuild.
7647 * For ret > 0, we record it only for file extent rebuild.
7648 * For ret == 0, the file extent exists but only bytenr
7649 * mismatch, let the original bytenr fix routine to handle,
7655 orphan = malloc(sizeof(*orphan));
7660 INIT_LIST_HEAD(&orphan->list);
7661 orphan->root = dback->root;
7662 orphan->objectid = dback->owner;
7663 orphan->offset = dback->offset;
7664 orphan->disk_bytenr = rec->cache.start;
7665 orphan->disk_len = rec->cache.size;
7666 list_add(&dest_root->orphan_data_extents, &orphan->list);
7667 recorded_data_ref = 1;
7670 btrfs_release_path(&path);
7672 return !recorded_data_ref;
7678 * when an incorrect extent item is found, this will delete
7679 * all of the existing entries for it and recreate them
7680 * based on what the tree scan found.
7682 static int fixup_extent_refs(struct btrfs_fs_info *info,
7683 struct cache_tree *extent_cache,
7684 struct extent_record *rec)
7686 struct btrfs_trans_handle *trans = NULL;
7688 struct btrfs_path path;
7689 struct list_head *cur = rec->backrefs.next;
7690 struct cache_extent *cache;
7691 struct extent_backref *back;
7695 if (rec->flag_block_full_backref)
7696 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7698 btrfs_init_path(&path);
7699 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7701 * Sometimes the backrefs themselves are so broken they don't
7702 * get attached to any meaningful rec, so first go back and
7703 * check any of our backrefs that we couldn't find and throw
7704 * them into the list if we find the backref so that
7705 * verify_backrefs can figure out what to do.
7707 ret = find_possible_backrefs(info, &path, extent_cache, rec);
7712 /* step one, make sure all of the backrefs agree */
7713 ret = verify_backrefs(info, &path, rec);
7717 trans = btrfs_start_transaction(info->extent_root, 1);
7718 if (IS_ERR(trans)) {
7719 ret = PTR_ERR(trans);
7723 /* step two, delete all the existing records */
7724 ret = delete_extent_records(trans, info->extent_root, &path,
7725 rec->start, rec->max_size);
7730 /* was this block corrupt? If so, don't add references to it */
7731 cache = lookup_cache_extent(info->corrupt_blocks,
7732 rec->start, rec->max_size);
7738 /* step three, recreate all the refs we did find */
7739 while(cur != &rec->backrefs) {
7740 back = to_extent_backref(cur);
7744 * if we didn't find any references, don't create a
7747 if (!back->found_ref)
7750 rec->bad_full_backref = 0;
7751 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
7759 int err = btrfs_commit_transaction(trans, info->extent_root);
7764 btrfs_release_path(&path);
7768 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7769 struct extent_record *rec)
7771 struct btrfs_trans_handle *trans;
7772 struct btrfs_root *root = fs_info->extent_root;
7773 struct btrfs_path path;
7774 struct btrfs_extent_item *ei;
7775 struct btrfs_key key;
7779 key.objectid = rec->start;
7780 if (rec->metadata) {
7781 key.type = BTRFS_METADATA_ITEM_KEY;
7782 key.offset = rec->info_level;
7784 key.type = BTRFS_EXTENT_ITEM_KEY;
7785 key.offset = rec->max_size;
7788 trans = btrfs_start_transaction(root, 0);
7790 return PTR_ERR(trans);
7792 btrfs_init_path(&path);
7793 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
7795 btrfs_release_path(&path);
7796 btrfs_commit_transaction(trans, root);
7799 fprintf(stderr, "Didn't find extent for %llu\n",
7800 (unsigned long long)rec->start);
7801 btrfs_release_path(&path);
7802 btrfs_commit_transaction(trans, root);
7806 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
7807 struct btrfs_extent_item);
7808 flags = btrfs_extent_flags(path.nodes[0], ei);
7809 if (rec->flag_block_full_backref) {
7810 fprintf(stderr, "setting full backref on %llu\n",
7811 (unsigned long long)key.objectid);
7812 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7814 fprintf(stderr, "clearing full backref on %llu\n",
7815 (unsigned long long)key.objectid);
7816 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7818 btrfs_set_extent_flags(path.nodes[0], ei, flags);
7819 btrfs_mark_buffer_dirty(path.nodes[0]);
7820 btrfs_release_path(&path);
7821 return btrfs_commit_transaction(trans, root);
7824 /* right now we only prune from the extent allocation tree */
7825 static int prune_one_block(struct btrfs_trans_handle *trans,
7826 struct btrfs_fs_info *info,
7827 struct btrfs_corrupt_block *corrupt)
7830 struct btrfs_path path;
7831 struct extent_buffer *eb;
7835 int level = corrupt->level + 1;
7837 btrfs_init_path(&path);
7839 /* we want to stop at the parent to our busted block */
7840 path.lowest_level = level;
7842 ret = btrfs_search_slot(trans, info->extent_root,
7843 &corrupt->key, &path, -1, 1);
7848 eb = path.nodes[level];
7855 * hopefully the search gave us the block we want to prune,
7856 * lets try that first
7858 slot = path.slots[level];
7859 found = btrfs_node_blockptr(eb, slot);
7860 if (found == corrupt->cache.start)
7863 nritems = btrfs_header_nritems(eb);
7865 /* the search failed, lets scan this node and hope we find it */
7866 for (slot = 0; slot < nritems; slot++) {
7867 found = btrfs_node_blockptr(eb, slot);
7868 if (found == corrupt->cache.start)
7872 * we couldn't find the bad block. TODO, search all the nodes for pointers
7875 if (eb == info->extent_root->node) {
7880 btrfs_release_path(&path);
7885 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7886 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7889 btrfs_release_path(&path);
7893 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7895 struct btrfs_trans_handle *trans = NULL;
7896 struct cache_extent *cache;
7897 struct btrfs_corrupt_block *corrupt;
7900 cache = search_cache_extent(info->corrupt_blocks, 0);
7904 trans = btrfs_start_transaction(info->extent_root, 1);
7906 return PTR_ERR(trans);
7908 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7909 prune_one_block(trans, info, corrupt);
7910 remove_cache_extent(info->corrupt_blocks, cache);
7913 return btrfs_commit_transaction(trans, info->extent_root);
7917 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7919 struct btrfs_block_group_cache *cache;
7924 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7925 &start, &end, EXTENT_DIRTY);
7928 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7934 cache = btrfs_lookup_first_block_group(fs_info, start);
7939 start = cache->key.objectid + cache->key.offset;
7943 static int check_extent_refs(struct btrfs_root *root,
7944 struct cache_tree *extent_cache)
7946 struct extent_record *rec;
7947 struct cache_extent *cache;
7956 * if we're doing a repair, we have to make sure
7957 * we don't allocate from the problem extents.
7958 * In the worst case, this will be all the
7961 cache = search_cache_extent(extent_cache, 0);
7963 rec = container_of(cache, struct extent_record, cache);
7964 set_extent_dirty(root->fs_info->excluded_extents,
7966 rec->start + rec->max_size - 1,
7968 cache = next_cache_extent(cache);
7971 /* pin down all the corrupted blocks too */
7972 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7974 set_extent_dirty(root->fs_info->excluded_extents,
7976 cache->start + cache->size - 1,
7978 cache = next_cache_extent(cache);
7980 prune_corrupt_blocks(root->fs_info);
7981 reset_cached_block_groups(root->fs_info);
7984 reset_cached_block_groups(root->fs_info);
7987 * We need to delete any duplicate entries we find first otherwise we
7988 * could mess up the extent tree when we have backrefs that actually
7989 * belong to a different extent item and not the weird duplicate one.
7991 while (repair && !list_empty(&duplicate_extents)) {
7992 rec = to_extent_record(duplicate_extents.next);
7993 list_del_init(&rec->list);
7995 /* Sometimes we can find a backref before we find an actual
7996 * extent, so we need to process it a little bit to see if there
7997 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7998 * if this is a backref screwup. If we need to delete stuff
7999 * process_duplicates() will return 0, otherwise it will return
8002 if (process_duplicates(root, extent_cache, rec))
8004 ret = delete_duplicate_records(root, rec);
8008 * delete_duplicate_records will return the number of entries
8009 * deleted, so if it's greater than 0 then we know we actually
8010 * did something and we need to remove.
8024 cache = search_cache_extent(extent_cache, 0);
8027 rec = container_of(cache, struct extent_record, cache);
8028 if (rec->num_duplicates) {
8029 fprintf(stderr, "extent item %llu has multiple extent "
8030 "items\n", (unsigned long long)rec->start);
8035 if (rec->refs != rec->extent_item_refs) {
8036 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8037 (unsigned long long)rec->start,
8038 (unsigned long long)rec->nr);
8039 fprintf(stderr, "extent item %llu, found %llu\n",
8040 (unsigned long long)rec->extent_item_refs,
8041 (unsigned long long)rec->refs);
8042 ret = record_orphan_data_extents(root->fs_info, rec);
8049 * we can't use the extent to repair file
8050 * extent, let the fallback method handle it.
8052 if (!fixed && repair) {
8053 ret = fixup_extent_refs(
8064 if (all_backpointers_checked(rec, 1)) {
8065 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8066 (unsigned long long)rec->start,
8067 (unsigned long long)rec->nr);
8069 if (!fixed && !recorded && repair) {
8070 ret = fixup_extent_refs(root->fs_info,
8079 if (!rec->owner_ref_checked) {
8080 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8081 (unsigned long long)rec->start,
8082 (unsigned long long)rec->nr);
8083 if (!fixed && !recorded && repair) {
8084 ret = fixup_extent_refs(root->fs_info,
8093 if (rec->bad_full_backref) {
8094 fprintf(stderr, "bad full backref, on [%llu]\n",
8095 (unsigned long long)rec->start);
8097 ret = fixup_extent_flags(root->fs_info, rec);
8106 * Although it's not a extent ref's problem, we reuse this
8107 * routine for error reporting.
8108 * No repair function yet.
8110 if (rec->crossing_stripes) {
8112 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8113 rec->start, rec->start + rec->max_size);
8118 if (rec->wrong_chunk_type) {
8120 "bad extent [%llu, %llu), type mismatch with chunk\n",
8121 rec->start, rec->start + rec->max_size);
8126 remove_cache_extent(extent_cache, cache);
8127 free_all_extent_backrefs(rec);
8128 if (!init_extent_tree && repair && (!cur_err || fixed))
8129 clear_extent_dirty(root->fs_info->excluded_extents,
8131 rec->start + rec->max_size - 1,
8137 if (ret && ret != -EAGAIN) {
8138 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8141 struct btrfs_trans_handle *trans;
8143 root = root->fs_info->extent_root;
8144 trans = btrfs_start_transaction(root, 1);
8145 if (IS_ERR(trans)) {
8146 ret = PTR_ERR(trans);
8150 btrfs_fix_block_accounting(trans, root);
8151 ret = btrfs_commit_transaction(trans, root);
8156 fprintf(stderr, "repaired damaged extent references\n");
8162 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8166 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8167 stripe_size = length;
8168 stripe_size /= num_stripes;
8169 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8170 stripe_size = length * 2;
8171 stripe_size /= num_stripes;
8172 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8173 stripe_size = length;
8174 stripe_size /= (num_stripes - 1);
8175 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8176 stripe_size = length;
8177 stripe_size /= (num_stripes - 2);
8179 stripe_size = length;
8185 * Check the chunk with its block group/dev list ref:
8186 * Return 0 if all refs seems valid.
8187 * Return 1 if part of refs seems valid, need later check for rebuild ref
8188 * like missing block group and needs to search extent tree to rebuild them.
8189 * Return -1 if essential refs are missing and unable to rebuild.
8191 static int check_chunk_refs(struct chunk_record *chunk_rec,
8192 struct block_group_tree *block_group_cache,
8193 struct device_extent_tree *dev_extent_cache,
8196 struct cache_extent *block_group_item;
8197 struct block_group_record *block_group_rec;
8198 struct cache_extent *dev_extent_item;
8199 struct device_extent_record *dev_extent_rec;
8203 int metadump_v2 = 0;
8207 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8210 if (block_group_item) {
8211 block_group_rec = container_of(block_group_item,
8212 struct block_group_record,
8214 if (chunk_rec->length != block_group_rec->offset ||
8215 chunk_rec->offset != block_group_rec->objectid ||
8217 chunk_rec->type_flags != block_group_rec->flags)) {
8220 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8221 chunk_rec->objectid,
8226 chunk_rec->type_flags,
8227 block_group_rec->objectid,
8228 block_group_rec->type,
8229 block_group_rec->offset,
8230 block_group_rec->offset,
8231 block_group_rec->objectid,
8232 block_group_rec->flags);
8235 list_del_init(&block_group_rec->list);
8236 chunk_rec->bg_rec = block_group_rec;
8241 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8242 chunk_rec->objectid,
8247 chunk_rec->type_flags);
8254 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8255 chunk_rec->num_stripes);
8256 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8257 devid = chunk_rec->stripes[i].devid;
8258 offset = chunk_rec->stripes[i].offset;
8259 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8260 devid, offset, length);
8261 if (dev_extent_item) {
8262 dev_extent_rec = container_of(dev_extent_item,
8263 struct device_extent_record,
8265 if (dev_extent_rec->objectid != devid ||
8266 dev_extent_rec->offset != offset ||
8267 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8268 dev_extent_rec->length != length) {
8271 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8272 chunk_rec->objectid,
8275 chunk_rec->stripes[i].devid,
8276 chunk_rec->stripes[i].offset,
8277 dev_extent_rec->objectid,
8278 dev_extent_rec->offset,
8279 dev_extent_rec->length);
8282 list_move(&dev_extent_rec->chunk_list,
8283 &chunk_rec->dextents);
8288 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8289 chunk_rec->objectid,
8292 chunk_rec->stripes[i].devid,
8293 chunk_rec->stripes[i].offset);
8300 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8301 int check_chunks(struct cache_tree *chunk_cache,
8302 struct block_group_tree *block_group_cache,
8303 struct device_extent_tree *dev_extent_cache,
8304 struct list_head *good, struct list_head *bad,
8305 struct list_head *rebuild, int silent)
8307 struct cache_extent *chunk_item;
8308 struct chunk_record *chunk_rec;
8309 struct block_group_record *bg_rec;
8310 struct device_extent_record *dext_rec;
8314 chunk_item = first_cache_extent(chunk_cache);
8315 while (chunk_item) {
8316 chunk_rec = container_of(chunk_item, struct chunk_record,
8318 err = check_chunk_refs(chunk_rec, block_group_cache,
8319 dev_extent_cache, silent);
8322 if (err == 0 && good)
8323 list_add_tail(&chunk_rec->list, good);
8324 if (err > 0 && rebuild)
8325 list_add_tail(&chunk_rec->list, rebuild);
8327 list_add_tail(&chunk_rec->list, bad);
8328 chunk_item = next_cache_extent(chunk_item);
8331 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8334 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8342 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8346 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8357 static int check_device_used(struct device_record *dev_rec,
8358 struct device_extent_tree *dext_cache)
8360 struct cache_extent *cache;
8361 struct device_extent_record *dev_extent_rec;
8364 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8366 dev_extent_rec = container_of(cache,
8367 struct device_extent_record,
8369 if (dev_extent_rec->objectid != dev_rec->devid)
8372 list_del_init(&dev_extent_rec->device_list);
8373 total_byte += dev_extent_rec->length;
8374 cache = next_cache_extent(cache);
8377 if (total_byte != dev_rec->byte_used) {
8379 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8380 total_byte, dev_rec->byte_used, dev_rec->objectid,
8381 dev_rec->type, dev_rec->offset);
8388 /* check btrfs_dev_item -> btrfs_dev_extent */
8389 static int check_devices(struct rb_root *dev_cache,
8390 struct device_extent_tree *dev_extent_cache)
8392 struct rb_node *dev_node;
8393 struct device_record *dev_rec;
8394 struct device_extent_record *dext_rec;
8398 dev_node = rb_first(dev_cache);
8400 dev_rec = container_of(dev_node, struct device_record, node);
8401 err = check_device_used(dev_rec, dev_extent_cache);
8405 dev_node = rb_next(dev_node);
8407 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8410 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8411 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8418 static int add_root_item_to_list(struct list_head *head,
8419 u64 objectid, u64 bytenr, u64 last_snapshot,
8420 u8 level, u8 drop_level,
8421 int level_size, struct btrfs_key *drop_key)
8424 struct root_item_record *ri_rec;
8425 ri_rec = malloc(sizeof(*ri_rec));
8428 ri_rec->bytenr = bytenr;
8429 ri_rec->objectid = objectid;
8430 ri_rec->level = level;
8431 ri_rec->level_size = level_size;
8432 ri_rec->drop_level = drop_level;
8433 ri_rec->last_snapshot = last_snapshot;
8435 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8436 list_add_tail(&ri_rec->list, head);
8441 static void free_root_item_list(struct list_head *list)
8443 struct root_item_record *ri_rec;
8445 while (!list_empty(list)) {
8446 ri_rec = list_first_entry(list, struct root_item_record,
8448 list_del_init(&ri_rec->list);
8453 static int deal_root_from_list(struct list_head *list,
8454 struct btrfs_root *root,
8455 struct block_info *bits,
8457 struct cache_tree *pending,
8458 struct cache_tree *seen,
8459 struct cache_tree *reada,
8460 struct cache_tree *nodes,
8461 struct cache_tree *extent_cache,
8462 struct cache_tree *chunk_cache,
8463 struct rb_root *dev_cache,
8464 struct block_group_tree *block_group_cache,
8465 struct device_extent_tree *dev_extent_cache)
8470 while (!list_empty(list)) {
8471 struct root_item_record *rec;
8472 struct extent_buffer *buf;
8473 rec = list_entry(list->next,
8474 struct root_item_record, list);
8476 buf = read_tree_block(root->fs_info->tree_root,
8477 rec->bytenr, rec->level_size, 0);
8478 if (!extent_buffer_uptodate(buf)) {
8479 free_extent_buffer(buf);
8483 ret = add_root_to_pending(buf, extent_cache, pending,
8484 seen, nodes, rec->objectid);
8488 * To rebuild extent tree, we need deal with snapshot
8489 * one by one, otherwise we deal with node firstly which
8490 * can maximize readahead.
8493 ret = run_next_block(root, bits, bits_nr, &last,
8494 pending, seen, reada, nodes,
8495 extent_cache, chunk_cache,
8496 dev_cache, block_group_cache,
8497 dev_extent_cache, rec);
8501 free_extent_buffer(buf);
8502 list_del(&rec->list);
8508 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8509 reada, nodes, extent_cache, chunk_cache,
8510 dev_cache, block_group_cache,
8511 dev_extent_cache, NULL);
8521 static int check_chunks_and_extents(struct btrfs_root *root)
8523 struct rb_root dev_cache;
8524 struct cache_tree chunk_cache;
8525 struct block_group_tree block_group_cache;
8526 struct device_extent_tree dev_extent_cache;
8527 struct cache_tree extent_cache;
8528 struct cache_tree seen;
8529 struct cache_tree pending;
8530 struct cache_tree reada;
8531 struct cache_tree nodes;
8532 struct extent_io_tree excluded_extents;
8533 struct cache_tree corrupt_blocks;
8534 struct btrfs_path path;
8535 struct btrfs_key key;
8536 struct btrfs_key found_key;
8538 struct block_info *bits;
8540 struct extent_buffer *leaf;
8542 struct btrfs_root_item ri;
8543 struct list_head dropping_trees;
8544 struct list_head normal_trees;
8545 struct btrfs_root *root1;
8550 dev_cache = RB_ROOT;
8551 cache_tree_init(&chunk_cache);
8552 block_group_tree_init(&block_group_cache);
8553 device_extent_tree_init(&dev_extent_cache);
8555 cache_tree_init(&extent_cache);
8556 cache_tree_init(&seen);
8557 cache_tree_init(&pending);
8558 cache_tree_init(&nodes);
8559 cache_tree_init(&reada);
8560 cache_tree_init(&corrupt_blocks);
8561 extent_io_tree_init(&excluded_extents);
8562 INIT_LIST_HEAD(&dropping_trees);
8563 INIT_LIST_HEAD(&normal_trees);
8566 root->fs_info->excluded_extents = &excluded_extents;
8567 root->fs_info->fsck_extent_cache = &extent_cache;
8568 root->fs_info->free_extent_hook = free_extent_hook;
8569 root->fs_info->corrupt_blocks = &corrupt_blocks;
8573 bits = malloc(bits_nr * sizeof(struct block_info));
8579 if (ctx.progress_enabled) {
8580 ctx.tp = TASK_EXTENTS;
8581 task_start(ctx.info);
8585 root1 = root->fs_info->tree_root;
8586 level = btrfs_header_level(root1->node);
8587 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8588 root1->node->start, 0, level, 0,
8589 root1->nodesize, NULL);
8592 root1 = root->fs_info->chunk_root;
8593 level = btrfs_header_level(root1->node);
8594 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8595 root1->node->start, 0, level, 0,
8596 root1->nodesize, NULL);
8599 btrfs_init_path(&path);
8602 key.type = BTRFS_ROOT_ITEM_KEY;
8603 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8608 leaf = path.nodes[0];
8609 slot = path.slots[0];
8610 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8611 ret = btrfs_next_leaf(root, &path);
8614 leaf = path.nodes[0];
8615 slot = path.slots[0];
8617 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8618 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
8619 unsigned long offset;
8622 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8623 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8624 last_snapshot = btrfs_root_last_snapshot(&ri);
8625 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8626 level = btrfs_root_level(&ri);
8627 level_size = root->nodesize;
8628 ret = add_root_item_to_list(&normal_trees,
8630 btrfs_root_bytenr(&ri),
8631 last_snapshot, level,
8632 0, level_size, NULL);
8636 level = btrfs_root_level(&ri);
8637 level_size = root->nodesize;
8638 objectid = found_key.objectid;
8639 btrfs_disk_key_to_cpu(&found_key,
8641 ret = add_root_item_to_list(&dropping_trees,
8643 btrfs_root_bytenr(&ri),
8644 last_snapshot, level,
8646 level_size, &found_key);
8653 btrfs_release_path(&path);
8656 * check_block can return -EAGAIN if it fixes something, please keep
8657 * this in mind when dealing with return values from these functions, if
8658 * we get -EAGAIN we want to fall through and restart the loop.
8660 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8661 &seen, &reada, &nodes, &extent_cache,
8662 &chunk_cache, &dev_cache, &block_group_cache,
8669 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8670 &pending, &seen, &reada, &nodes,
8671 &extent_cache, &chunk_cache, &dev_cache,
8672 &block_group_cache, &dev_extent_cache);
8679 ret = check_chunks(&chunk_cache, &block_group_cache,
8680 &dev_extent_cache, NULL, NULL, NULL, 0);
8687 ret = check_extent_refs(root, &extent_cache);
8694 ret = check_devices(&dev_cache, &dev_extent_cache);
8699 task_stop(ctx.info);
8701 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8702 extent_io_tree_cleanup(&excluded_extents);
8703 root->fs_info->fsck_extent_cache = NULL;
8704 root->fs_info->free_extent_hook = NULL;
8705 root->fs_info->corrupt_blocks = NULL;
8706 root->fs_info->excluded_extents = NULL;
8709 free_chunk_cache_tree(&chunk_cache);
8710 free_device_cache_tree(&dev_cache);
8711 free_block_group_tree(&block_group_cache);
8712 free_device_extent_tree(&dev_extent_cache);
8713 free_extent_cache_tree(&seen);
8714 free_extent_cache_tree(&pending);
8715 free_extent_cache_tree(&reada);
8716 free_extent_cache_tree(&nodes);
8719 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8720 free_extent_cache_tree(&seen);
8721 free_extent_cache_tree(&pending);
8722 free_extent_cache_tree(&reada);
8723 free_extent_cache_tree(&nodes);
8724 free_chunk_cache_tree(&chunk_cache);
8725 free_block_group_tree(&block_group_cache);
8726 free_device_cache_tree(&dev_cache);
8727 free_device_extent_tree(&dev_extent_cache);
8728 free_extent_record_cache(root->fs_info, &extent_cache);
8729 free_root_item_list(&normal_trees);
8730 free_root_item_list(&dropping_trees);
8731 extent_io_tree_cleanup(&excluded_extents);
8736 * Check backrefs of a tree block given by @bytenr or @eb.
8738 * @root: the root containing the @bytenr or @eb
8739 * @eb: tree block extent buffer, can be NULL
8740 * @bytenr: bytenr of the tree block to search
8741 * @level: tree level of the tree block
8742 * @owner: owner of the tree block
8744 * Return >0 for any error found and output error message
8745 * Return 0 for no error found
8747 static int check_tree_block_ref(struct btrfs_root *root,
8748 struct extent_buffer *eb, u64 bytenr,
8749 int level, u64 owner)
8751 struct btrfs_key key;
8752 struct btrfs_root *extent_root = root->fs_info->extent_root;
8753 struct btrfs_path path;
8754 struct btrfs_extent_item *ei;
8755 struct btrfs_extent_inline_ref *iref;
8756 struct extent_buffer *leaf;
8762 u32 nodesize = root->nodesize;
8769 btrfs_init_path(&path);
8770 key.objectid = bytenr;
8771 if (btrfs_fs_incompat(root->fs_info,
8772 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8773 key.type = BTRFS_METADATA_ITEM_KEY;
8775 key.type = BTRFS_EXTENT_ITEM_KEY;
8776 key.offset = (u64)-1;
8778 /* Search for the backref in extent tree */
8779 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8781 err |= BACKREF_MISSING;
8784 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8786 err |= BACKREF_MISSING;
8790 leaf = path.nodes[0];
8791 slot = path.slots[0];
8792 btrfs_item_key_to_cpu(leaf, &key, slot);
8794 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8796 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8797 skinny_level = (int)key.offset;
8798 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8800 struct btrfs_tree_block_info *info;
8802 info = (struct btrfs_tree_block_info *)(ei + 1);
8803 skinny_level = btrfs_tree_block_level(leaf, info);
8804 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8811 if (!(btrfs_extent_flags(leaf, ei) &
8812 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8814 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8815 key.objectid, nodesize,
8816 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8817 err = BACKREF_MISMATCH;
8819 header_gen = btrfs_header_generation(eb);
8820 extent_gen = btrfs_extent_generation(leaf, ei);
8821 if (header_gen != extent_gen) {
8823 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8824 key.objectid, nodesize, header_gen,
8826 err = BACKREF_MISMATCH;
8828 if (level != skinny_level) {
8830 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8831 key.objectid, nodesize, level, skinny_level);
8832 err = BACKREF_MISMATCH;
8834 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8836 "extent[%llu %u] is referred by other roots than %llu",
8837 key.objectid, nodesize, root->objectid);
8838 err = BACKREF_MISMATCH;
8843 * Iterate the extent/metadata item to find the exact backref
8845 item_size = btrfs_item_size_nr(leaf, slot);
8846 ptr = (unsigned long)iref;
8847 end = (unsigned long)ei + item_size;
8849 iref = (struct btrfs_extent_inline_ref *)ptr;
8850 type = btrfs_extent_inline_ref_type(leaf, iref);
8851 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8853 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8854 (offset == root->objectid || offset == owner)) {
8856 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8857 /* Check if the backref points to valid referencer */
8858 found_ref = !check_tree_block_ref(root, NULL, offset,
8864 ptr += btrfs_extent_inline_ref_size(type);
8868 * Inlined extent item doesn't have what we need, check
8869 * TREE_BLOCK_REF_KEY
8872 btrfs_release_path(&path);
8873 key.objectid = bytenr;
8874 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8875 key.offset = root->objectid;
8877 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8882 err |= BACKREF_MISSING;
8884 btrfs_release_path(&path);
8885 if (eb && (err & BACKREF_MISSING))
8886 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8887 bytenr, nodesize, owner, level);
8892 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8894 * Return >0 any error found and output error message
8895 * Return 0 for no error found
8897 static int check_extent_data_item(struct btrfs_root *root,
8898 struct extent_buffer *eb, int slot)
8900 struct btrfs_file_extent_item *fi;
8901 struct btrfs_path path;
8902 struct btrfs_root *extent_root = root->fs_info->extent_root;
8903 struct btrfs_key fi_key;
8904 struct btrfs_key dbref_key;
8905 struct extent_buffer *leaf;
8906 struct btrfs_extent_item *ei;
8907 struct btrfs_extent_inline_ref *iref;
8908 struct btrfs_extent_data_ref *dref;
8910 u64 file_extent_gen;
8913 u64 extent_num_bytes;
8921 int found_dbackref = 0;
8925 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8926 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8927 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8929 /* Nothing to check for hole and inline data extents */
8930 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8931 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8934 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8935 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8936 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8938 /* Check unaligned disk_num_bytes and num_bytes */
8939 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8941 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8942 fi_key.objectid, fi_key.offset, disk_num_bytes,
8944 err |= BYTES_UNALIGNED;
8946 data_bytes_allocated += disk_num_bytes;
8948 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8950 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8951 fi_key.objectid, fi_key.offset, extent_num_bytes,
8953 err |= BYTES_UNALIGNED;
8955 data_bytes_referenced += extent_num_bytes;
8957 owner = btrfs_header_owner(eb);
8959 /* Check the extent item of the file extent in extent tree */
8960 btrfs_init_path(&path);
8961 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8962 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8963 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8965 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8967 err |= BACKREF_MISSING;
8971 leaf = path.nodes[0];
8972 slot = path.slots[0];
8973 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8975 extent_flags = btrfs_extent_flags(leaf, ei);
8976 extent_gen = btrfs_extent_generation(leaf, ei);
8978 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8980 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8981 disk_bytenr, disk_num_bytes,
8982 BTRFS_EXTENT_FLAG_DATA);
8983 err |= BACKREF_MISMATCH;
8986 if (file_extent_gen < extent_gen) {
8988 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8989 disk_bytenr, disk_num_bytes, file_extent_gen,
8991 err |= BACKREF_MISMATCH;
8994 /* Check data backref inside that extent item */
8995 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8996 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8997 ptr = (unsigned long)iref;
8998 end = (unsigned long)ei + item_size;
9000 iref = (struct btrfs_extent_inline_ref *)ptr;
9001 type = btrfs_extent_inline_ref_type(leaf, iref);
9002 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9004 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9005 ref_root = btrfs_extent_data_ref_root(leaf, dref);
9006 if (ref_root == owner || ref_root == root->objectid)
9008 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
9009 found_dbackref = !check_tree_block_ref(root, NULL,
9010 btrfs_extent_inline_ref_offset(leaf, iref),
9016 ptr += btrfs_extent_inline_ref_size(type);
9019 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9020 if (!found_dbackref) {
9021 btrfs_release_path(&path);
9023 btrfs_init_path(&path);
9024 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9025 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9026 dbref_key.offset = hash_extent_data_ref(root->objectid,
9027 fi_key.objectid, fi_key.offset);
9029 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9030 &dbref_key, &path, 0, 0);
9035 if (!found_dbackref)
9036 err |= BACKREF_MISSING;
9038 btrfs_release_path(&path);
9039 if (err & BACKREF_MISSING) {
9040 error("data extent[%llu %llu] backref lost",
9041 disk_bytenr, disk_num_bytes);
9047 * Get real tree block level for the case like shared block
9048 * Return >= 0 as tree level
9049 * Return <0 for error
9051 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9053 struct extent_buffer *eb;
9054 struct btrfs_path path;
9055 struct btrfs_key key;
9056 struct btrfs_extent_item *ei;
9059 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9064 /* Search extent tree for extent generation and level */
9065 key.objectid = bytenr;
9066 key.type = BTRFS_METADATA_ITEM_KEY;
9067 key.offset = (u64)-1;
9069 btrfs_init_path(&path);
9070 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9073 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9081 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9082 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9083 struct btrfs_extent_item);
9084 flags = btrfs_extent_flags(path.nodes[0], ei);
9085 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9090 /* Get transid for later read_tree_block() check */
9091 transid = btrfs_extent_generation(path.nodes[0], ei);
9093 /* Get backref level as one source */
9094 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9095 backref_level = key.offset;
9097 struct btrfs_tree_block_info *info;
9099 info = (struct btrfs_tree_block_info *)(ei + 1);
9100 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9102 btrfs_release_path(&path);
9104 /* Get level from tree block as an alternative source */
9105 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9106 if (!extent_buffer_uptodate(eb)) {
9107 free_extent_buffer(eb);
9110 header_level = btrfs_header_level(eb);
9111 free_extent_buffer(eb);
9113 if (header_level != backref_level)
9115 return header_level;
9118 btrfs_release_path(&path);
9123 * Check if a tree block backref is valid (points to a valid tree block)
9124 * if level == -1, level will be resolved
9125 * Return >0 for any error found and print error message
9127 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9128 u64 bytenr, int level)
9130 struct btrfs_root *root;
9131 struct btrfs_key key;
9132 struct btrfs_path path;
9133 struct extent_buffer *eb;
9134 struct extent_buffer *node;
9135 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9139 /* Query level for level == -1 special case */
9141 level = query_tree_block_level(fs_info, bytenr);
9143 err |= REFERENCER_MISSING;
9147 key.objectid = root_id;
9148 key.type = BTRFS_ROOT_ITEM_KEY;
9149 key.offset = (u64)-1;
9151 root = btrfs_read_fs_root(fs_info, &key);
9153 err |= REFERENCER_MISSING;
9157 /* Read out the tree block to get item/node key */
9158 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9159 if (!extent_buffer_uptodate(eb)) {
9160 err |= REFERENCER_MISSING;
9161 free_extent_buffer(eb);
9165 /* Empty tree, no need to check key */
9166 if (!btrfs_header_nritems(eb) && !level) {
9167 free_extent_buffer(eb);
9172 btrfs_node_key_to_cpu(eb, &key, 0);
9174 btrfs_item_key_to_cpu(eb, &key, 0);
9176 free_extent_buffer(eb);
9178 btrfs_init_path(&path);
9179 path.lowest_level = level;
9180 /* Search with the first key, to ensure we can reach it */
9181 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9183 err |= REFERENCER_MISSING;
9187 node = path.nodes[level];
9188 if (btrfs_header_bytenr(node) != bytenr) {
9190 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9191 bytenr, nodesize, bytenr,
9192 btrfs_header_bytenr(node));
9193 err |= REFERENCER_MISMATCH;
9195 if (btrfs_header_level(node) != level) {
9197 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9198 bytenr, nodesize, level,
9199 btrfs_header_level(node));
9200 err |= REFERENCER_MISMATCH;
9204 btrfs_release_path(&path);
9206 if (err & REFERENCER_MISSING) {
9208 error("extent [%llu %d] lost referencer (owner: %llu)",
9209 bytenr, nodesize, root_id);
9212 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9213 bytenr, nodesize, root_id, level);
9220 * Check referencer for shared block backref
9221 * If level == -1, this function will resolve the level.
9223 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9224 u64 parent, u64 bytenr, int level)
9226 struct extent_buffer *eb;
9227 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9229 int found_parent = 0;
9232 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9233 if (!extent_buffer_uptodate(eb))
9237 level = query_tree_block_level(fs_info, bytenr);
9241 if (level + 1 != btrfs_header_level(eb))
9244 nr = btrfs_header_nritems(eb);
9245 for (i = 0; i < nr; i++) {
9246 if (bytenr == btrfs_node_blockptr(eb, i)) {
9252 free_extent_buffer(eb);
9253 if (!found_parent) {
9255 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9256 bytenr, nodesize, parent, level);
9257 return REFERENCER_MISSING;
9263 * Check referencer for normal (inlined) data ref
9264 * If len == 0, it will be resolved by searching in extent tree
9266 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9267 u64 root_id, u64 objectid, u64 offset,
9268 u64 bytenr, u64 len, u32 count)
9270 struct btrfs_root *root;
9271 struct btrfs_root *extent_root = fs_info->extent_root;
9272 struct btrfs_key key;
9273 struct btrfs_path path;
9274 struct extent_buffer *leaf;
9275 struct btrfs_file_extent_item *fi;
9276 u32 found_count = 0;
9281 key.objectid = bytenr;
9282 key.type = BTRFS_EXTENT_ITEM_KEY;
9283 key.offset = (u64)-1;
9285 btrfs_init_path(&path);
9286 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9289 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9292 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9293 if (key.objectid != bytenr ||
9294 key.type != BTRFS_EXTENT_ITEM_KEY)
9297 btrfs_release_path(&path);
9299 key.objectid = root_id;
9300 key.type = BTRFS_ROOT_ITEM_KEY;
9301 key.offset = (u64)-1;
9302 btrfs_init_path(&path);
9304 root = btrfs_read_fs_root(fs_info, &key);
9308 key.objectid = objectid;
9309 key.type = BTRFS_EXTENT_DATA_KEY;
9311 * It can be nasty as data backref offset is
9312 * file offset - file extent offset, which is smaller or
9313 * equal to original backref offset. The only special case is
9314 * overflow. So we need to special check and do further search.
9316 key.offset = offset & (1ULL << 63) ? 0 : offset;
9318 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9323 * Search afterwards to get correct one
9324 * NOTE: As we must do a comprehensive check on the data backref to
9325 * make sure the dref count also matches, we must iterate all file
9326 * extents for that inode.
9329 leaf = path.nodes[0];
9330 slot = path.slots[0];
9332 btrfs_item_key_to_cpu(leaf, &key, slot);
9333 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9335 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9337 * Except normal disk bytenr and disk num bytes, we still
9338 * need to do extra check on dbackref offset as
9339 * dbackref offset = file_offset - file_extent_offset
9341 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9342 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9343 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9347 ret = btrfs_next_item(root, &path);
9352 btrfs_release_path(&path);
9353 if (found_count != count) {
9355 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9356 bytenr, len, root_id, objectid, offset, count, found_count);
9357 return REFERENCER_MISSING;
9363 * Check if the referencer of a shared data backref exists
9365 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9366 u64 parent, u64 bytenr)
9368 struct extent_buffer *eb;
9369 struct btrfs_key key;
9370 struct btrfs_file_extent_item *fi;
9371 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9373 int found_parent = 0;
9376 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9377 if (!extent_buffer_uptodate(eb))
9380 nr = btrfs_header_nritems(eb);
9381 for (i = 0; i < nr; i++) {
9382 btrfs_item_key_to_cpu(eb, &key, i);
9383 if (key.type != BTRFS_EXTENT_DATA_KEY)
9386 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9387 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9390 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9397 free_extent_buffer(eb);
9398 if (!found_parent) {
9399 error("shared extent %llu referencer lost (parent: %llu)",
9401 return REFERENCER_MISSING;
9407 * This function will check a given extent item, including its backref and
9408 * itself (like crossing stripe boundary and type)
9410 * Since we don't use extent_record anymore, introduce new error bit
9412 static int check_extent_item(struct btrfs_fs_info *fs_info,
9413 struct extent_buffer *eb, int slot)
9415 struct btrfs_extent_item *ei;
9416 struct btrfs_extent_inline_ref *iref;
9417 struct btrfs_extent_data_ref *dref;
9421 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9422 u32 item_size = btrfs_item_size_nr(eb, slot);
9427 struct btrfs_key key;
9431 btrfs_item_key_to_cpu(eb, &key, slot);
9432 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9433 bytes_used += key.offset;
9435 bytes_used += nodesize;
9437 if (item_size < sizeof(*ei)) {
9439 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9440 * old thing when on disk format is still un-determined.
9441 * No need to care about it anymore
9443 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9447 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9448 flags = btrfs_extent_flags(eb, ei);
9450 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9452 if (metadata && check_crossing_stripes(global_info, key.objectid,
9454 error("bad metadata [%llu, %llu) crossing stripe boundary",
9455 key.objectid, key.objectid + nodesize);
9456 err |= CROSSING_STRIPE_BOUNDARY;
9459 ptr = (unsigned long)(ei + 1);
9461 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9462 /* Old EXTENT_ITEM metadata */
9463 struct btrfs_tree_block_info *info;
9465 info = (struct btrfs_tree_block_info *)ptr;
9466 level = btrfs_tree_block_level(eb, info);
9467 ptr += sizeof(struct btrfs_tree_block_info);
9469 /* New METADATA_ITEM */
9472 end = (unsigned long)ei + item_size;
9475 err |= ITEM_SIZE_MISMATCH;
9479 /* Now check every backref in this extent item */
9481 iref = (struct btrfs_extent_inline_ref *)ptr;
9482 type = btrfs_extent_inline_ref_type(eb, iref);
9483 offset = btrfs_extent_inline_ref_offset(eb, iref);
9485 case BTRFS_TREE_BLOCK_REF_KEY:
9486 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9490 case BTRFS_SHARED_BLOCK_REF_KEY:
9491 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9495 case BTRFS_EXTENT_DATA_REF_KEY:
9496 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9497 ret = check_extent_data_backref(fs_info,
9498 btrfs_extent_data_ref_root(eb, dref),
9499 btrfs_extent_data_ref_objectid(eb, dref),
9500 btrfs_extent_data_ref_offset(eb, dref),
9501 key.objectid, key.offset,
9502 btrfs_extent_data_ref_count(eb, dref));
9505 case BTRFS_SHARED_DATA_REF_KEY:
9506 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9510 error("extent[%llu %d %llu] has unknown ref type: %d",
9511 key.objectid, key.type, key.offset, type);
9512 err |= UNKNOWN_TYPE;
9516 ptr += btrfs_extent_inline_ref_size(type);
9525 * Check if a dev extent item is referred correctly by its chunk
9527 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9528 struct extent_buffer *eb, int slot)
9530 struct btrfs_root *chunk_root = fs_info->chunk_root;
9531 struct btrfs_dev_extent *ptr;
9532 struct btrfs_path path;
9533 struct btrfs_key chunk_key;
9534 struct btrfs_key devext_key;
9535 struct btrfs_chunk *chunk;
9536 struct extent_buffer *l;
9540 int found_chunk = 0;
9543 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9544 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9545 length = btrfs_dev_extent_length(eb, ptr);
9547 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9548 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9549 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9551 btrfs_init_path(&path);
9552 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9557 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9558 if (btrfs_chunk_length(l, chunk) != length)
9561 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9562 for (i = 0; i < num_stripes; i++) {
9563 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9564 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9566 if (devid == devext_key.objectid &&
9567 offset == devext_key.offset) {
9573 btrfs_release_path(&path);
9576 "device extent[%llu, %llu, %llu] did not find the related chunk",
9577 devext_key.objectid, devext_key.offset, length);
9578 return REFERENCER_MISSING;
9584 * Check if the used space is correct with the dev item
9586 static int check_dev_item(struct btrfs_fs_info *fs_info,
9587 struct extent_buffer *eb, int slot)
9589 struct btrfs_root *dev_root = fs_info->dev_root;
9590 struct btrfs_dev_item *dev_item;
9591 struct btrfs_path path;
9592 struct btrfs_key key;
9593 struct btrfs_dev_extent *ptr;
9599 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9600 dev_id = btrfs_device_id(eb, dev_item);
9601 used = btrfs_device_bytes_used(eb, dev_item);
9603 key.objectid = dev_id;
9604 key.type = BTRFS_DEV_EXTENT_KEY;
9607 btrfs_init_path(&path);
9608 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9610 btrfs_item_key_to_cpu(eb, &key, slot);
9611 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9612 key.objectid, key.type, key.offset);
9613 btrfs_release_path(&path);
9614 return REFERENCER_MISSING;
9617 /* Iterate dev_extents to calculate the used space of a device */
9619 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9621 if (key.objectid > dev_id)
9623 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9626 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9627 struct btrfs_dev_extent);
9628 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9630 ret = btrfs_next_item(dev_root, &path);
9634 btrfs_release_path(&path);
9636 if (used != total) {
9637 btrfs_item_key_to_cpu(eb, &key, slot);
9639 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9640 total, used, BTRFS_ROOT_TREE_OBJECTID,
9641 BTRFS_DEV_EXTENT_KEY, dev_id);
9642 return ACCOUNTING_MISMATCH;
9648 * Check a block group item with its referener (chunk) and its used space
9649 * with extent/metadata item
9651 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9652 struct extent_buffer *eb, int slot)
9654 struct btrfs_root *extent_root = fs_info->extent_root;
9655 struct btrfs_root *chunk_root = fs_info->chunk_root;
9656 struct btrfs_block_group_item *bi;
9657 struct btrfs_block_group_item bg_item;
9658 struct btrfs_path path;
9659 struct btrfs_key bg_key;
9660 struct btrfs_key chunk_key;
9661 struct btrfs_key extent_key;
9662 struct btrfs_chunk *chunk;
9663 struct extent_buffer *leaf;
9664 struct btrfs_extent_item *ei;
9665 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9673 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9674 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9675 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9676 used = btrfs_block_group_used(&bg_item);
9677 bg_flags = btrfs_block_group_flags(&bg_item);
9679 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9680 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9681 chunk_key.offset = bg_key.objectid;
9683 btrfs_init_path(&path);
9684 /* Search for the referencer chunk */
9685 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9688 "block group[%llu %llu] did not find the related chunk item",
9689 bg_key.objectid, bg_key.offset);
9690 err |= REFERENCER_MISSING;
9692 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9693 struct btrfs_chunk);
9694 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9697 "block group[%llu %llu] related chunk item length does not match",
9698 bg_key.objectid, bg_key.offset);
9699 err |= REFERENCER_MISMATCH;
9702 btrfs_release_path(&path);
9704 /* Search from the block group bytenr */
9705 extent_key.objectid = bg_key.objectid;
9706 extent_key.type = 0;
9707 extent_key.offset = 0;
9709 btrfs_init_path(&path);
9710 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9714 /* Iterate extent tree to account used space */
9716 leaf = path.nodes[0];
9717 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9718 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9721 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9722 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9724 if (extent_key.objectid < bg_key.objectid)
9727 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9730 total += extent_key.offset;
9732 ei = btrfs_item_ptr(leaf, path.slots[0],
9733 struct btrfs_extent_item);
9734 flags = btrfs_extent_flags(leaf, ei);
9735 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9736 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9738 "bad extent[%llu, %llu) type mismatch with chunk",
9739 extent_key.objectid,
9740 extent_key.objectid + extent_key.offset);
9741 err |= CHUNK_TYPE_MISMATCH;
9743 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9744 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9745 BTRFS_BLOCK_GROUP_METADATA))) {
9747 "bad extent[%llu, %llu) type mismatch with chunk",
9748 extent_key.objectid,
9749 extent_key.objectid + nodesize);
9750 err |= CHUNK_TYPE_MISMATCH;
9754 ret = btrfs_next_item(extent_root, &path);
9760 btrfs_release_path(&path);
9762 if (total != used) {
9764 "block group[%llu %llu] used %llu but extent items used %llu",
9765 bg_key.objectid, bg_key.offset, used, total);
9766 err |= ACCOUNTING_MISMATCH;
9772 * Check a chunk item.
9773 * Including checking all referred dev_extents and block group
9775 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9776 struct extent_buffer *eb, int slot)
9778 struct btrfs_root *extent_root = fs_info->extent_root;
9779 struct btrfs_root *dev_root = fs_info->dev_root;
9780 struct btrfs_path path;
9781 struct btrfs_key chunk_key;
9782 struct btrfs_key bg_key;
9783 struct btrfs_key devext_key;
9784 struct btrfs_chunk *chunk;
9785 struct extent_buffer *leaf;
9786 struct btrfs_block_group_item *bi;
9787 struct btrfs_block_group_item bg_item;
9788 struct btrfs_dev_extent *ptr;
9789 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9801 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9802 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9803 length = btrfs_chunk_length(eb, chunk);
9804 chunk_end = chunk_key.offset + length;
9805 if (!IS_ALIGNED(length, sectorsize)) {
9806 error("chunk[%llu %llu) not aligned to %u",
9807 chunk_key.offset, chunk_end, sectorsize);
9808 err |= BYTES_UNALIGNED;
9812 type = btrfs_chunk_type(eb, chunk);
9813 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9814 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9815 error("chunk[%llu %llu) has no chunk type",
9816 chunk_key.offset, chunk_end);
9817 err |= UNKNOWN_TYPE;
9819 if (profile && (profile & (profile - 1))) {
9820 error("chunk[%llu %llu) multiple profiles detected: %llx",
9821 chunk_key.offset, chunk_end, profile);
9822 err |= UNKNOWN_TYPE;
9825 bg_key.objectid = chunk_key.offset;
9826 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9827 bg_key.offset = length;
9829 btrfs_init_path(&path);
9830 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9833 "chunk[%llu %llu) did not find the related block group item",
9834 chunk_key.offset, chunk_end);
9835 err |= REFERENCER_MISSING;
9837 leaf = path.nodes[0];
9838 bi = btrfs_item_ptr(leaf, path.slots[0],
9839 struct btrfs_block_group_item);
9840 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9842 if (btrfs_block_group_flags(&bg_item) != type) {
9844 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9845 chunk_key.offset, chunk_end, type,
9846 btrfs_block_group_flags(&bg_item));
9847 err |= REFERENCER_MISSING;
9851 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9852 for (i = 0; i < num_stripes; i++) {
9853 btrfs_release_path(&path);
9854 btrfs_init_path(&path);
9855 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9856 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9857 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9859 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9864 leaf = path.nodes[0];
9865 ptr = btrfs_item_ptr(leaf, path.slots[0],
9866 struct btrfs_dev_extent);
9867 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9868 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9869 if (objectid != chunk_key.objectid ||
9870 offset != chunk_key.offset ||
9871 btrfs_dev_extent_length(leaf, ptr) != length)
9875 err |= BACKREF_MISSING;
9877 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9878 chunk_key.objectid, chunk_end, i);
9881 btrfs_release_path(&path);
9887 * Main entry function to check known items and update related accounting info
9889 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9891 struct btrfs_fs_info *fs_info = root->fs_info;
9892 struct btrfs_key key;
9895 struct btrfs_extent_data_ref *dref;
9900 btrfs_item_key_to_cpu(eb, &key, slot);
9904 case BTRFS_EXTENT_DATA_KEY:
9905 ret = check_extent_data_item(root, eb, slot);
9908 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9909 ret = check_block_group_item(fs_info, eb, slot);
9912 case BTRFS_DEV_ITEM_KEY:
9913 ret = check_dev_item(fs_info, eb, slot);
9916 case BTRFS_CHUNK_ITEM_KEY:
9917 ret = check_chunk_item(fs_info, eb, slot);
9920 case BTRFS_DEV_EXTENT_KEY:
9921 ret = check_dev_extent_item(fs_info, eb, slot);
9924 case BTRFS_EXTENT_ITEM_KEY:
9925 case BTRFS_METADATA_ITEM_KEY:
9926 ret = check_extent_item(fs_info, eb, slot);
9929 case BTRFS_EXTENT_CSUM_KEY:
9930 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9932 case BTRFS_TREE_BLOCK_REF_KEY:
9933 ret = check_tree_block_backref(fs_info, key.offset,
9937 case BTRFS_EXTENT_DATA_REF_KEY:
9938 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9939 ret = check_extent_data_backref(fs_info,
9940 btrfs_extent_data_ref_root(eb, dref),
9941 btrfs_extent_data_ref_objectid(eb, dref),
9942 btrfs_extent_data_ref_offset(eb, dref),
9944 btrfs_extent_data_ref_count(eb, dref));
9947 case BTRFS_SHARED_BLOCK_REF_KEY:
9948 ret = check_shared_block_backref(fs_info, key.offset,
9952 case BTRFS_SHARED_DATA_REF_KEY:
9953 ret = check_shared_data_backref(fs_info, key.offset,
9961 if (++slot < btrfs_header_nritems(eb))
9968 * Helper function for later fs/subvol tree check. To determine if a tree
9969 * block should be checked.
9970 * This function will ensure only the direct referencer with lowest rootid to
9971 * check a fs/subvolume tree block.
9973 * Backref check at extent tree would detect errors like missing subvolume
9974 * tree, so we can do aggressive check to reduce duplicated checks.
9976 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9978 struct btrfs_root *extent_root = root->fs_info->extent_root;
9979 struct btrfs_key key;
9980 struct btrfs_path path;
9981 struct extent_buffer *leaf;
9983 struct btrfs_extent_item *ei;
9989 struct btrfs_extent_inline_ref *iref;
9992 btrfs_init_path(&path);
9993 key.objectid = btrfs_header_bytenr(eb);
9994 key.type = BTRFS_METADATA_ITEM_KEY;
9995 key.offset = (u64)-1;
9998 * Any failure in backref resolving means we can't determine
9999 * whom the tree block belongs to.
10000 * So in that case, we need to check that tree block
10002 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10006 ret = btrfs_previous_extent_item(extent_root, &path,
10007 btrfs_header_bytenr(eb));
10011 leaf = path.nodes[0];
10012 slot = path.slots[0];
10013 btrfs_item_key_to_cpu(leaf, &key, slot);
10014 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10016 if (key.type == BTRFS_METADATA_ITEM_KEY) {
10017 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10019 struct btrfs_tree_block_info *info;
10021 info = (struct btrfs_tree_block_info *)(ei + 1);
10022 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10025 item_size = btrfs_item_size_nr(leaf, slot);
10026 ptr = (unsigned long)iref;
10027 end = (unsigned long)ei + item_size;
10028 while (ptr < end) {
10029 iref = (struct btrfs_extent_inline_ref *)ptr;
10030 type = btrfs_extent_inline_ref_type(leaf, iref);
10031 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10034 * We only check the tree block if current root is
10035 * the lowest referencer of it.
10037 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10038 offset < root->objectid) {
10039 btrfs_release_path(&path);
10043 ptr += btrfs_extent_inline_ref_size(type);
10046 * Normally we should also check keyed tree block ref, but that may be
10047 * very time consuming. Inlined ref should already make us skip a lot
10048 * of refs now. So skip search keyed tree block ref.
10052 btrfs_release_path(&path);
10057 * Traversal function for tree block. We will do:
10058 * 1) Skip shared fs/subvolume tree blocks
10059 * 2) Update related bytes accounting
10060 * 3) Pre-order traversal
10062 static int traverse_tree_block(struct btrfs_root *root,
10063 struct extent_buffer *node)
10065 struct extent_buffer *eb;
10066 struct btrfs_key key;
10067 struct btrfs_key drop_key;
10075 * Skip shared fs/subvolume tree block, in that case they will
10076 * be checked by referencer with lowest rootid
10078 if (is_fstree(root->objectid) && !should_check(root, node))
10081 /* Update bytes accounting */
10082 total_btree_bytes += node->len;
10083 if (fs_root_objectid(btrfs_header_owner(node)))
10084 total_fs_tree_bytes += node->len;
10085 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10086 total_extent_tree_bytes += node->len;
10087 if (!found_old_backref &&
10088 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10089 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10090 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10091 found_old_backref = 1;
10093 /* pre-order tranversal, check itself first */
10094 level = btrfs_header_level(node);
10095 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10096 btrfs_header_level(node),
10097 btrfs_header_owner(node));
10101 "check %s failed root %llu bytenr %llu level %d, force continue check",
10102 level ? "node":"leaf", root->objectid,
10103 btrfs_header_bytenr(node), btrfs_header_level(node));
10106 btree_space_waste += btrfs_leaf_free_space(root, node);
10107 ret = check_leaf_items(root, node);
10112 nr = btrfs_header_nritems(node);
10113 btrfs_disk_key_to_cpu(&drop_key, &root->root_item.drop_progress);
10114 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10115 sizeof(struct btrfs_key_ptr);
10117 /* Then check all its children */
10118 for (i = 0; i < nr; i++) {
10119 u64 blocknr = btrfs_node_blockptr(node, i);
10121 btrfs_node_key_to_cpu(node, &key, i);
10122 if (level == root->root_item.drop_level &&
10123 is_dropped_key(&key, &drop_key))
10127 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10128 * to call the function itself.
10130 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10131 if (extent_buffer_uptodate(eb)) {
10132 ret = traverse_tree_block(root, eb);
10135 free_extent_buffer(eb);
10142 * Low memory usage version check_chunks_and_extents.
10144 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10146 struct btrfs_path path;
10147 struct btrfs_key key;
10148 struct btrfs_root *root1;
10149 struct btrfs_root *cur_root;
10153 root1 = root->fs_info->chunk_root;
10154 ret = traverse_tree_block(root1, root1->node);
10157 root1 = root->fs_info->tree_root;
10158 ret = traverse_tree_block(root1, root1->node);
10161 btrfs_init_path(&path);
10162 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10164 key.type = BTRFS_ROOT_ITEM_KEY;
10166 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10168 error("cannot find extent treet in tree_root");
10173 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10174 if (key.type != BTRFS_ROOT_ITEM_KEY)
10176 key.offset = (u64)-1;
10178 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10179 if (IS_ERR(cur_root) || !cur_root) {
10180 error("failed to read tree: %lld", key.objectid);
10184 ret = traverse_tree_block(cur_root, cur_root->node);
10188 ret = btrfs_next_item(root1, &path);
10194 btrfs_release_path(&path);
10198 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10199 struct btrfs_root *root, int overwrite)
10201 struct extent_buffer *c;
10202 struct extent_buffer *old = root->node;
10205 struct btrfs_disk_key disk_key = {0,0,0};
10211 extent_buffer_get(c);
10214 c = btrfs_alloc_free_block(trans, root,
10216 root->root_key.objectid,
10217 &disk_key, level, 0, 0);
10220 extent_buffer_get(c);
10224 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10225 btrfs_set_header_level(c, level);
10226 btrfs_set_header_bytenr(c, c->start);
10227 btrfs_set_header_generation(c, trans->transid);
10228 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10229 btrfs_set_header_owner(c, root->root_key.objectid);
10231 write_extent_buffer(c, root->fs_info->fsid,
10232 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10234 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10235 btrfs_header_chunk_tree_uuid(c),
10238 btrfs_mark_buffer_dirty(c);
10240 * this case can happen in the following case:
10242 * 1.overwrite previous root.
10244 * 2.reinit reloc data root, this is because we skip pin
10245 * down reloc data tree before which means we can allocate
10246 * same block bytenr here.
10248 if (old->start == c->start) {
10249 btrfs_set_root_generation(&root->root_item,
10251 root->root_item.level = btrfs_header_level(root->node);
10252 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10253 &root->root_key, &root->root_item);
10255 free_extent_buffer(c);
10259 free_extent_buffer(old);
10261 add_root_to_dirty_list(root);
10265 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10266 struct extent_buffer *eb, int tree_root)
10268 struct extent_buffer *tmp;
10269 struct btrfs_root_item *ri;
10270 struct btrfs_key key;
10273 int level = btrfs_header_level(eb);
10279 * If we have pinned this block before, don't pin it again.
10280 * This can not only avoid forever loop with broken filesystem
10281 * but also give us some speedups.
10283 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10284 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10287 btrfs_pin_extent(fs_info, eb->start, eb->len);
10289 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10290 nritems = btrfs_header_nritems(eb);
10291 for (i = 0; i < nritems; i++) {
10293 btrfs_item_key_to_cpu(eb, &key, i);
10294 if (key.type != BTRFS_ROOT_ITEM_KEY)
10296 /* Skip the extent root and reloc roots */
10297 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10298 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10299 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10301 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10302 bytenr = btrfs_disk_root_bytenr(eb, ri);
10305 * If at any point we start needing the real root we
10306 * will have to build a stump root for the root we are
10307 * in, but for now this doesn't actually use the root so
10308 * just pass in extent_root.
10310 tmp = read_tree_block(fs_info->extent_root, bytenr,
10312 if (!extent_buffer_uptodate(tmp)) {
10313 fprintf(stderr, "Error reading root block\n");
10316 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10317 free_extent_buffer(tmp);
10321 bytenr = btrfs_node_blockptr(eb, i);
10323 /* If we aren't the tree root don't read the block */
10324 if (level == 1 && !tree_root) {
10325 btrfs_pin_extent(fs_info, bytenr, nodesize);
10329 tmp = read_tree_block(fs_info->extent_root, bytenr,
10331 if (!extent_buffer_uptodate(tmp)) {
10332 fprintf(stderr, "Error reading tree block\n");
10335 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10336 free_extent_buffer(tmp);
10345 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10349 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10353 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10356 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10358 struct btrfs_block_group_cache *cache;
10359 struct btrfs_path path;
10360 struct extent_buffer *leaf;
10361 struct btrfs_chunk *chunk;
10362 struct btrfs_key key;
10366 btrfs_init_path(&path);
10368 key.type = BTRFS_CHUNK_ITEM_KEY;
10370 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
10372 btrfs_release_path(&path);
10377 * We do this in case the block groups were screwed up and had alloc
10378 * bits that aren't actually set on the chunks. This happens with
10379 * restored images every time and could happen in real life I guess.
10381 fs_info->avail_data_alloc_bits = 0;
10382 fs_info->avail_metadata_alloc_bits = 0;
10383 fs_info->avail_system_alloc_bits = 0;
10385 /* First we need to create the in-memory block groups */
10387 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10388 ret = btrfs_next_leaf(fs_info->chunk_root, &path);
10390 btrfs_release_path(&path);
10398 leaf = path.nodes[0];
10399 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10400 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10405 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
10406 btrfs_add_block_group(fs_info, 0,
10407 btrfs_chunk_type(leaf, chunk),
10408 key.objectid, key.offset,
10409 btrfs_chunk_length(leaf, chunk));
10410 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10411 key.offset + btrfs_chunk_length(leaf, chunk),
10417 cache = btrfs_lookup_first_block_group(fs_info, start);
10421 start = cache->key.objectid + cache->key.offset;
10424 btrfs_release_path(&path);
10428 static int reset_balance(struct btrfs_trans_handle *trans,
10429 struct btrfs_fs_info *fs_info)
10431 struct btrfs_root *root = fs_info->tree_root;
10432 struct btrfs_path path;
10433 struct extent_buffer *leaf;
10434 struct btrfs_key key;
10435 int del_slot, del_nr = 0;
10439 btrfs_init_path(&path);
10440 key.objectid = BTRFS_BALANCE_OBJECTID;
10441 key.type = BTRFS_BALANCE_ITEM_KEY;
10443 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10448 goto reinit_data_reloc;
10453 ret = btrfs_del_item(trans, root, &path);
10456 btrfs_release_path(&path);
10458 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10459 key.type = BTRFS_ROOT_ITEM_KEY;
10461 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10465 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10470 ret = btrfs_del_items(trans, root, &path,
10477 btrfs_release_path(&path);
10480 ret = btrfs_search_slot(trans, root, &key, &path,
10487 leaf = path.nodes[0];
10488 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10489 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10491 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10496 del_slot = path.slots[0];
10505 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
10509 btrfs_release_path(&path);
10512 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10513 key.type = BTRFS_ROOT_ITEM_KEY;
10514 key.offset = (u64)-1;
10515 root = btrfs_read_fs_root(fs_info, &key);
10516 if (IS_ERR(root)) {
10517 fprintf(stderr, "Error reading data reloc tree\n");
10518 ret = PTR_ERR(root);
10521 record_root_in_trans(trans, root);
10522 ret = btrfs_fsck_reinit_root(trans, root, 0);
10525 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10527 btrfs_release_path(&path);
10531 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10532 struct btrfs_fs_info *fs_info)
10538 * The only reason we don't do this is because right now we're just
10539 * walking the trees we find and pinning down their bytes, we don't look
10540 * at any of the leaves. In order to do mixed groups we'd have to check
10541 * the leaves of any fs roots and pin down the bytes for any file
10542 * extents we find. Not hard but why do it if we don't have to?
10544 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10545 fprintf(stderr, "We don't support re-initing the extent tree "
10546 "for mixed block groups yet, please notify a btrfs "
10547 "developer you want to do this so they can add this "
10548 "functionality.\n");
10553 * first we need to walk all of the trees except the extent tree and pin
10554 * down the bytes that are in use so we don't overwrite any existing
10557 ret = pin_metadata_blocks(fs_info);
10559 fprintf(stderr, "error pinning down used bytes\n");
10564 * Need to drop all the block groups since we're going to recreate all
10567 btrfs_free_block_groups(fs_info);
10568 ret = reset_block_groups(fs_info);
10570 fprintf(stderr, "error resetting the block groups\n");
10574 /* Ok we can allocate now, reinit the extent root */
10575 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10577 fprintf(stderr, "extent root initialization failed\n");
10579 * When the transaction code is updated we should end the
10580 * transaction, but for now progs only knows about commit so
10581 * just return an error.
10587 * Now we have all the in-memory block groups setup so we can make
10588 * allocations properly, and the metadata we care about is safe since we
10589 * pinned all of it above.
10592 struct btrfs_block_group_cache *cache;
10594 cache = btrfs_lookup_first_block_group(fs_info, start);
10597 start = cache->key.objectid + cache->key.offset;
10598 ret = btrfs_insert_item(trans, fs_info->extent_root,
10599 &cache->key, &cache->item,
10600 sizeof(cache->item));
10602 fprintf(stderr, "Error adding block group\n");
10605 btrfs_extent_post_op(trans, fs_info->extent_root);
10608 ret = reset_balance(trans, fs_info);
10610 fprintf(stderr, "error resetting the pending balance\n");
10615 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10617 struct btrfs_path path;
10618 struct btrfs_trans_handle *trans;
10619 struct btrfs_key key;
10622 printf("Recowing metadata block %llu\n", eb->start);
10623 key.objectid = btrfs_header_owner(eb);
10624 key.type = BTRFS_ROOT_ITEM_KEY;
10625 key.offset = (u64)-1;
10627 root = btrfs_read_fs_root(root->fs_info, &key);
10628 if (IS_ERR(root)) {
10629 fprintf(stderr, "Couldn't find owner root %llu\n",
10631 return PTR_ERR(root);
10634 trans = btrfs_start_transaction(root, 1);
10636 return PTR_ERR(trans);
10638 btrfs_init_path(&path);
10639 path.lowest_level = btrfs_header_level(eb);
10640 if (path.lowest_level)
10641 btrfs_node_key_to_cpu(eb, &key, 0);
10643 btrfs_item_key_to_cpu(eb, &key, 0);
10645 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10646 btrfs_commit_transaction(trans, root);
10647 btrfs_release_path(&path);
10651 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10653 struct btrfs_path path;
10654 struct btrfs_trans_handle *trans;
10655 struct btrfs_key key;
10658 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10659 bad->key.type, bad->key.offset);
10660 key.objectid = bad->root_id;
10661 key.type = BTRFS_ROOT_ITEM_KEY;
10662 key.offset = (u64)-1;
10664 root = btrfs_read_fs_root(root->fs_info, &key);
10665 if (IS_ERR(root)) {
10666 fprintf(stderr, "Couldn't find owner root %llu\n",
10668 return PTR_ERR(root);
10671 trans = btrfs_start_transaction(root, 1);
10673 return PTR_ERR(trans);
10675 btrfs_init_path(&path);
10676 ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
10682 ret = btrfs_del_item(trans, root, &path);
10684 btrfs_commit_transaction(trans, root);
10685 btrfs_release_path(&path);
10689 static int zero_log_tree(struct btrfs_root *root)
10691 struct btrfs_trans_handle *trans;
10694 trans = btrfs_start_transaction(root, 1);
10695 if (IS_ERR(trans)) {
10696 ret = PTR_ERR(trans);
10699 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10700 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10701 ret = btrfs_commit_transaction(trans, root);
10705 static int populate_csum(struct btrfs_trans_handle *trans,
10706 struct btrfs_root *csum_root, char *buf, u64 start,
10713 while (offset < len) {
10714 sectorsize = csum_root->sectorsize;
10715 ret = read_extent_data(csum_root, buf, start + offset,
10719 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10720 start + offset, buf, sectorsize);
10723 offset += sectorsize;
10728 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10729 struct btrfs_root *csum_root,
10730 struct btrfs_root *cur_root)
10732 struct btrfs_path path;
10733 struct btrfs_key key;
10734 struct extent_buffer *node;
10735 struct btrfs_file_extent_item *fi;
10742 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10746 btrfs_init_path(&path);
10750 ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
10753 /* Iterate all regular file extents and fill its csum */
10755 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10757 if (key.type != BTRFS_EXTENT_DATA_KEY)
10759 node = path.nodes[0];
10760 slot = path.slots[0];
10761 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10762 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10764 start = btrfs_file_extent_disk_bytenr(node, fi);
10765 len = btrfs_file_extent_disk_num_bytes(node, fi);
10767 ret = populate_csum(trans, csum_root, buf, start, len);
10768 if (ret == -EEXIST)
10774 * TODO: if next leaf is corrupted, jump to nearest next valid
10777 ret = btrfs_next_item(cur_root, &path);
10787 btrfs_release_path(&path);
10792 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10793 struct btrfs_root *csum_root)
10795 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10796 struct btrfs_path path;
10797 struct btrfs_root *tree_root = fs_info->tree_root;
10798 struct btrfs_root *cur_root;
10799 struct extent_buffer *node;
10800 struct btrfs_key key;
10804 btrfs_init_path(&path);
10805 key.objectid = BTRFS_FS_TREE_OBJECTID;
10807 key.type = BTRFS_ROOT_ITEM_KEY;
10808 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
10817 node = path.nodes[0];
10818 slot = path.slots[0];
10819 btrfs_item_key_to_cpu(node, &key, slot);
10820 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10822 if (key.type != BTRFS_ROOT_ITEM_KEY)
10824 if (!is_fstree(key.objectid))
10826 key.offset = (u64)-1;
10828 cur_root = btrfs_read_fs_root(fs_info, &key);
10829 if (IS_ERR(cur_root) || !cur_root) {
10830 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10834 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10839 ret = btrfs_next_item(tree_root, &path);
10849 btrfs_release_path(&path);
10853 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10854 struct btrfs_root *csum_root)
10856 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10857 struct btrfs_path path;
10858 struct btrfs_extent_item *ei;
10859 struct extent_buffer *leaf;
10861 struct btrfs_key key;
10864 btrfs_init_path(&path);
10866 key.type = BTRFS_EXTENT_ITEM_KEY;
10868 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
10870 btrfs_release_path(&path);
10874 buf = malloc(csum_root->sectorsize);
10876 btrfs_release_path(&path);
10881 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
10882 ret = btrfs_next_leaf(extent_root, &path);
10890 leaf = path.nodes[0];
10892 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
10893 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10898 ei = btrfs_item_ptr(leaf, path.slots[0],
10899 struct btrfs_extent_item);
10900 if (!(btrfs_extent_flags(leaf, ei) &
10901 BTRFS_EXTENT_FLAG_DATA)) {
10906 ret = populate_csum(trans, csum_root, buf, key.objectid,
10913 btrfs_release_path(&path);
10919 * Recalculate the csum and put it into the csum tree.
10921 * Extent tree init will wipe out all the extent info, so in that case, we
10922 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10923 * will use fs/subvol trees to init the csum tree.
10925 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10926 struct btrfs_root *csum_root,
10927 int search_fs_tree)
10929 if (search_fs_tree)
10930 return fill_csum_tree_from_fs(trans, csum_root);
10932 return fill_csum_tree_from_extent(trans, csum_root);
10935 static void free_roots_info_cache(void)
10937 if (!roots_info_cache)
10940 while (!cache_tree_empty(roots_info_cache)) {
10941 struct cache_extent *entry;
10942 struct root_item_info *rii;
10944 entry = first_cache_extent(roots_info_cache);
10947 remove_cache_extent(roots_info_cache, entry);
10948 rii = container_of(entry, struct root_item_info, cache_extent);
10952 free(roots_info_cache);
10953 roots_info_cache = NULL;
10956 static int build_roots_info_cache(struct btrfs_fs_info *info)
10959 struct btrfs_key key;
10960 struct extent_buffer *leaf;
10961 struct btrfs_path path;
10963 if (!roots_info_cache) {
10964 roots_info_cache = malloc(sizeof(*roots_info_cache));
10965 if (!roots_info_cache)
10967 cache_tree_init(roots_info_cache);
10970 btrfs_init_path(&path);
10972 key.type = BTRFS_EXTENT_ITEM_KEY;
10974 ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
10977 leaf = path.nodes[0];
10980 struct btrfs_key found_key;
10981 struct btrfs_extent_item *ei;
10982 struct btrfs_extent_inline_ref *iref;
10983 int slot = path.slots[0];
10988 struct cache_extent *entry;
10989 struct root_item_info *rii;
10991 if (slot >= btrfs_header_nritems(leaf)) {
10992 ret = btrfs_next_leaf(info->extent_root, &path);
10999 leaf = path.nodes[0];
11000 slot = path.slots[0];
11003 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11005 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11006 found_key.type != BTRFS_METADATA_ITEM_KEY)
11009 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11010 flags = btrfs_extent_flags(leaf, ei);
11012 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11013 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11016 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11017 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11018 level = found_key.offset;
11020 struct btrfs_tree_block_info *binfo;
11022 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11023 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11024 level = btrfs_tree_block_level(leaf, binfo);
11028 * For a root extent, it must be of the following type and the
11029 * first (and only one) iref in the item.
11031 type = btrfs_extent_inline_ref_type(leaf, iref);
11032 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11035 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11036 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11038 rii = malloc(sizeof(struct root_item_info));
11043 rii->cache_extent.start = root_id;
11044 rii->cache_extent.size = 1;
11045 rii->level = (u8)-1;
11046 entry = &rii->cache_extent;
11047 ret = insert_cache_extent(roots_info_cache, entry);
11050 rii = container_of(entry, struct root_item_info,
11054 ASSERT(rii->cache_extent.start == root_id);
11055 ASSERT(rii->cache_extent.size == 1);
11057 if (level > rii->level || rii->level == (u8)-1) {
11058 rii->level = level;
11059 rii->bytenr = found_key.objectid;
11060 rii->gen = btrfs_extent_generation(leaf, ei);
11061 rii->node_count = 1;
11062 } else if (level == rii->level) {
11070 btrfs_release_path(&path);
11075 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11076 struct btrfs_path *path,
11077 const struct btrfs_key *root_key,
11078 const int read_only_mode)
11080 const u64 root_id = root_key->objectid;
11081 struct cache_extent *entry;
11082 struct root_item_info *rii;
11083 struct btrfs_root_item ri;
11084 unsigned long offset;
11086 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11089 "Error: could not find extent items for root %llu\n",
11090 root_key->objectid);
11094 rii = container_of(entry, struct root_item_info, cache_extent);
11095 ASSERT(rii->cache_extent.start == root_id);
11096 ASSERT(rii->cache_extent.size == 1);
11098 if (rii->node_count != 1) {
11100 "Error: could not find btree root extent for root %llu\n",
11105 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11106 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11108 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11109 btrfs_root_level(&ri) != rii->level ||
11110 btrfs_root_generation(&ri) != rii->gen) {
11113 * If we're in repair mode but our caller told us to not update
11114 * the root item, i.e. just check if it needs to be updated, don't
11115 * print this message, since the caller will call us again shortly
11116 * for the same root item without read only mode (the caller will
11117 * open a transaction first).
11119 if (!(read_only_mode && repair))
11121 "%sroot item for root %llu,"
11122 " current bytenr %llu, current gen %llu, current level %u,"
11123 " new bytenr %llu, new gen %llu, new level %u\n",
11124 (read_only_mode ? "" : "fixing "),
11126 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11127 btrfs_root_level(&ri),
11128 rii->bytenr, rii->gen, rii->level);
11130 if (btrfs_root_generation(&ri) > rii->gen) {
11132 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11133 root_id, btrfs_root_generation(&ri), rii->gen);
11137 if (!read_only_mode) {
11138 btrfs_set_root_bytenr(&ri, rii->bytenr);
11139 btrfs_set_root_level(&ri, rii->level);
11140 btrfs_set_root_generation(&ri, rii->gen);
11141 write_extent_buffer(path->nodes[0], &ri,
11142 offset, sizeof(ri));
11152 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11153 * caused read-only snapshots to be corrupted if they were created at a moment
11154 * when the source subvolume/snapshot had orphan items. The issue was that the
11155 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11156 * node instead of the post orphan cleanup root node.
11157 * So this function, and its callees, just detects and fixes those cases. Even
11158 * though the regression was for read-only snapshots, this function applies to
11159 * any snapshot/subvolume root.
11160 * This must be run before any other repair code - not doing it so, makes other
11161 * repair code delete or modify backrefs in the extent tree for example, which
11162 * will result in an inconsistent fs after repairing the root items.
11164 static int repair_root_items(struct btrfs_fs_info *info)
11166 struct btrfs_path path;
11167 struct btrfs_key key;
11168 struct extent_buffer *leaf;
11169 struct btrfs_trans_handle *trans = NULL;
11172 int need_trans = 0;
11174 btrfs_init_path(&path);
11176 ret = build_roots_info_cache(info);
11180 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11181 key.type = BTRFS_ROOT_ITEM_KEY;
11186 * Avoid opening and committing transactions if a leaf doesn't have
11187 * any root items that need to be fixed, so that we avoid rotating
11188 * backup roots unnecessarily.
11191 trans = btrfs_start_transaction(info->tree_root, 1);
11192 if (IS_ERR(trans)) {
11193 ret = PTR_ERR(trans);
11198 ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
11202 leaf = path.nodes[0];
11205 struct btrfs_key found_key;
11207 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
11208 int no_more_keys = find_next_key(&path, &key);
11210 btrfs_release_path(&path);
11212 ret = btrfs_commit_transaction(trans,
11224 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11226 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11228 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11231 ret = maybe_repair_root_item(info, &path, &found_key,
11236 if (!trans && repair) {
11239 btrfs_release_path(&path);
11249 free_roots_info_cache();
11250 btrfs_release_path(&path);
11252 btrfs_commit_transaction(trans, info->tree_root);
11259 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
11261 struct btrfs_trans_handle *trans;
11262 struct btrfs_block_group_cache *bg_cache;
11266 /* Clear all free space cache inodes and its extent data */
11268 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
11271 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
11274 current = bg_cache->key.objectid + bg_cache->key.offset;
11277 /* Don't forget to set cache_generation to -1 */
11278 trans = btrfs_start_transaction(fs_info->tree_root, 0);
11279 if (IS_ERR(trans)) {
11280 error("failed to update super block cache generation");
11281 return PTR_ERR(trans);
11283 btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
11284 btrfs_commit_transaction(trans, fs_info->tree_root);
11289 const char * const cmd_check_usage[] = {
11290 "btrfs check [options] <device>",
11291 "Check structural integrity of a filesystem (unmounted).",
11292 "Check structural integrity of an unmounted filesystem. Verify internal",
11293 "trees' consistency and item connectivity. In the repair mode try to",
11294 "fix the problems found. ",
11295 "WARNING: the repair mode is considered dangerous",
11297 "-s|--super <superblock> use this superblock copy",
11298 "-b|--backup use the first valid backup root copy",
11299 "--repair try to repair the filesystem",
11300 "--readonly run in read-only mode (default)",
11301 "--init-csum-tree create a new CRC tree",
11302 "--init-extent-tree create a new extent tree",
11303 "--mode <MODE> allows choice of memory/IO trade-offs",
11304 " where MODE is one of:",
11305 " original - read inodes and extents to memory (requires",
11306 " more memory, does less IO)",
11307 " lowmem - try to use less memory but read blocks again",
11309 "--check-data-csum verify checksums of data blocks",
11310 "-Q|--qgroup-report print a report on qgroup consistency",
11311 "-E|--subvol-extents <subvolid>",
11312 " print subvolume extents and sharing state",
11313 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11314 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11315 "-p|--progress indicate progress",
11316 "--clear-space-cache v1|v2 clear space cache for v1 or v2",
11320 int cmd_check(int argc, char **argv)
11322 struct cache_tree root_cache;
11323 struct btrfs_root *root;
11324 struct btrfs_fs_info *info;
11327 u64 tree_root_bytenr = 0;
11328 u64 chunk_root_bytenr = 0;
11329 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11332 int init_csum_tree = 0;
11334 int clear_space_cache = 0;
11335 int qgroup_report = 0;
11336 int qgroups_repaired = 0;
11337 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11341 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11342 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11343 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11344 GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE };
11345 static const struct option long_options[] = {
11346 { "super", required_argument, NULL, 's' },
11347 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11348 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11349 { "init-csum-tree", no_argument, NULL,
11350 GETOPT_VAL_INIT_CSUM },
11351 { "init-extent-tree", no_argument, NULL,
11352 GETOPT_VAL_INIT_EXTENT },
11353 { "check-data-csum", no_argument, NULL,
11354 GETOPT_VAL_CHECK_CSUM },
11355 { "backup", no_argument, NULL, 'b' },
11356 { "subvol-extents", required_argument, NULL, 'E' },
11357 { "qgroup-report", no_argument, NULL, 'Q' },
11358 { "tree-root", required_argument, NULL, 'r' },
11359 { "chunk-root", required_argument, NULL,
11360 GETOPT_VAL_CHUNK_TREE },
11361 { "progress", no_argument, NULL, 'p' },
11362 { "mode", required_argument, NULL,
11364 { "clear-space-cache", required_argument, NULL,
11365 GETOPT_VAL_CLEAR_SPACE_CACHE},
11366 { NULL, 0, NULL, 0}
11369 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11373 case 'a': /* ignored */ break;
11375 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11378 num = arg_strtou64(optarg);
11379 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11381 "super mirror should be less than %d",
11382 BTRFS_SUPER_MIRROR_MAX);
11385 bytenr = btrfs_sb_offset(((int)num));
11386 printf("using SB copy %llu, bytenr %llu\n", num,
11387 (unsigned long long)bytenr);
11393 subvolid = arg_strtou64(optarg);
11396 tree_root_bytenr = arg_strtou64(optarg);
11398 case GETOPT_VAL_CHUNK_TREE:
11399 chunk_root_bytenr = arg_strtou64(optarg);
11402 ctx.progress_enabled = true;
11406 usage(cmd_check_usage);
11407 case GETOPT_VAL_REPAIR:
11408 printf("enabling repair mode\n");
11410 ctree_flags |= OPEN_CTREE_WRITES;
11412 case GETOPT_VAL_READONLY:
11415 case GETOPT_VAL_INIT_CSUM:
11416 printf("Creating a new CRC tree\n");
11417 init_csum_tree = 1;
11419 ctree_flags |= OPEN_CTREE_WRITES;
11421 case GETOPT_VAL_INIT_EXTENT:
11422 init_extent_tree = 1;
11423 ctree_flags |= (OPEN_CTREE_WRITES |
11424 OPEN_CTREE_NO_BLOCK_GROUPS);
11427 case GETOPT_VAL_CHECK_CSUM:
11428 check_data_csum = 1;
11430 case GETOPT_VAL_MODE:
11431 check_mode = parse_check_mode(optarg);
11432 if (check_mode == CHECK_MODE_UNKNOWN) {
11433 error("unknown mode: %s", optarg);
11437 case GETOPT_VAL_CLEAR_SPACE_CACHE:
11438 if (strcmp(optarg, "v1") == 0) {
11439 clear_space_cache = 1;
11440 } else if (strcmp(optarg, "v2") == 0) {
11441 clear_space_cache = 2;
11442 ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
11445 "invalid argument to --clear-space-cache, must be v1 or v2");
11448 ctree_flags |= OPEN_CTREE_WRITES;
11453 if (check_argc_exact(argc - optind, 1))
11454 usage(cmd_check_usage);
11456 if (ctx.progress_enabled) {
11457 ctx.tp = TASK_NOTHING;
11458 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11461 /* This check is the only reason for --readonly to exist */
11462 if (readonly && repair) {
11463 error("repair options are not compatible with --readonly");
11468 * Not supported yet
11470 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11471 error("low memory mode doesn't support repair yet");
11476 cache_tree_init(&root_cache);
11478 if((ret = check_mounted(argv[optind])) < 0) {
11479 error("could not check mount status: %s", strerror(-ret));
11482 error("%s is currently mounted, aborting", argv[optind]);
11487 /* only allow partial opening under repair mode */
11489 ctree_flags |= OPEN_CTREE_PARTIAL;
11491 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11492 chunk_root_bytenr, ctree_flags);
11494 error("cannot open file system");
11499 global_info = info;
11500 root = info->fs_root;
11501 if (clear_space_cache == 1) {
11502 if (btrfs_fs_compat_ro(info,
11503 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11505 "free space cache v2 detected, use --clear-space-cache v2");
11509 printf("Clearing free space cache\n");
11510 ret = clear_free_space_cache(info);
11512 error("failed to clear free space cache");
11515 printf("Free space cache cleared\n");
11518 } else if (clear_space_cache == 2) {
11519 if (!btrfs_fs_compat_ro(info,
11520 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
11521 printf("no free space cache v2 to clear\n");
11525 printf("Clear free space cache v2\n");
11526 ret = btrfs_clear_free_space_tree(info);
11528 error("failed to clear free space cache v2: %d", ret);
11531 printf("free space cache v2 cleared\n");
11537 * repair mode will force us to commit transaction which
11538 * will make us fail to load log tree when mounting.
11540 if (repair && btrfs_super_log_root(info->super_copy)) {
11541 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
11546 ret = zero_log_tree(root);
11548 error("failed to zero log tree: %d", ret);
11553 uuid_unparse(info->super_copy->fsid, uuidbuf);
11554 if (qgroup_report) {
11555 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11557 ret = qgroup_verify_all(info);
11563 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11564 subvolid, argv[optind], uuidbuf);
11565 ret = print_extent_state(info, subvolid);
11568 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11570 if (!extent_buffer_uptodate(info->tree_root->node) ||
11571 !extent_buffer_uptodate(info->dev_root->node) ||
11572 !extent_buffer_uptodate(info->chunk_root->node)) {
11573 error("critical roots corrupted, unable to check the filesystem");
11578 if (init_extent_tree || init_csum_tree) {
11579 struct btrfs_trans_handle *trans;
11581 trans = btrfs_start_transaction(info->extent_root, 0);
11582 if (IS_ERR(trans)) {
11583 error("error starting transaction");
11584 ret = PTR_ERR(trans);
11588 if (init_extent_tree) {
11589 printf("Creating a new extent tree\n");
11590 ret = reinit_extent_tree(trans, info);
11595 if (init_csum_tree) {
11596 printf("Reinitialize checksum tree\n");
11597 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11599 error("checksum tree initialization failed: %d",
11605 ret = fill_csum_tree(trans, info->csum_root,
11608 error("checksum tree refilling failed: %d", ret);
11613 * Ok now we commit and run the normal fsck, which will add
11614 * extent entries for all of the items it finds.
11616 ret = btrfs_commit_transaction(trans, info->extent_root);
11620 if (!extent_buffer_uptodate(info->extent_root->node)) {
11621 error("critical: extent_root, unable to check the filesystem");
11625 if (!extent_buffer_uptodate(info->csum_root->node)) {
11626 error("critical: csum_root, unable to check the filesystem");
11631 if (!ctx.progress_enabled)
11632 fprintf(stderr, "checking extents\n");
11633 if (check_mode == CHECK_MODE_LOWMEM)
11634 ret = check_chunks_and_extents_v2(root);
11636 ret = check_chunks_and_extents(root);
11639 "errors found in extent allocation tree or chunk allocation");
11641 ret = repair_root_items(info);
11645 fprintf(stderr, "Fixed %d roots.\n", ret);
11647 } else if (ret > 0) {
11649 "Found %d roots with an outdated root item.\n",
11652 "Please run a filesystem check with the option --repair to fix them.\n");
11657 if (!ctx.progress_enabled) {
11658 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11659 fprintf(stderr, "checking free space tree\n");
11661 fprintf(stderr, "checking free space cache\n");
11663 ret = check_space_cache(root);
11668 * We used to have to have these hole extents in between our real
11669 * extents so if we don't have this flag set we need to make sure there
11670 * are no gaps in the file extents for inodes, otherwise we can just
11671 * ignore it when this happens.
11673 no_holes = btrfs_fs_incompat(root->fs_info,
11674 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11675 if (!ctx.progress_enabled)
11676 fprintf(stderr, "checking fs roots\n");
11677 ret = check_fs_roots(root, &root_cache);
11681 fprintf(stderr, "checking csums\n");
11682 ret = check_csums(root);
11686 fprintf(stderr, "checking root refs\n");
11687 ret = check_root_refs(root, &root_cache);
11691 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11692 struct extent_buffer *eb;
11694 eb = list_first_entry(&root->fs_info->recow_ebs,
11695 struct extent_buffer, recow);
11696 list_del_init(&eb->recow);
11697 ret = recow_extent_buffer(root, eb);
11702 while (!list_empty(&delete_items)) {
11703 struct bad_item *bad;
11705 bad = list_first_entry(&delete_items, struct bad_item, list);
11706 list_del_init(&bad->list);
11708 ret = delete_bad_item(root, bad);
11712 if (info->quota_enabled) {
11714 fprintf(stderr, "checking quota groups\n");
11715 err = qgroup_verify_all(info);
11719 err = repair_qgroups(info, &qgroups_repaired);
11724 if (!list_empty(&root->fs_info->recow_ebs)) {
11725 error("transid errors in file system");
11729 /* Don't override original ret */
11730 if (!ret && qgroups_repaired)
11731 ret = qgroups_repaired;
11733 if (found_old_backref) { /*
11734 * there was a disk format change when mixed
11735 * backref was in testing tree. The old format
11736 * existed about one week.
11738 printf("\n * Found old mixed backref format. "
11739 "The old format is not supported! *"
11740 "\n * Please mount the FS in readonly mode, "
11741 "backup data and re-format the FS. *\n\n");
11744 printf("found %llu bytes used err is %d\n",
11745 (unsigned long long)bytes_used, ret);
11746 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11747 printf("total tree bytes: %llu\n",
11748 (unsigned long long)total_btree_bytes);
11749 printf("total fs tree bytes: %llu\n",
11750 (unsigned long long)total_fs_tree_bytes);
11751 printf("total extent tree bytes: %llu\n",
11752 (unsigned long long)total_extent_tree_bytes);
11753 printf("btree space waste bytes: %llu\n",
11754 (unsigned long long)btree_space_waste);
11755 printf("file data blocks allocated: %llu\n referenced %llu\n",
11756 (unsigned long long)data_bytes_allocated,
11757 (unsigned long long)data_bytes_referenced);
11759 free_qgroup_counts();
11760 free_root_recs_tree(&root_cache);
11764 if (ctx.progress_enabled)
11765 task_deinit(ctx.info);