2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 return rb_entry(node, struct extent_backref, node);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
122 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124 struct data_backref *back1 = to_data_backref(ext1);
125 struct data_backref *back2 = to_data_backref(ext2);
127 WARN_ON(!ext1->is_data);
128 WARN_ON(!ext2->is_data);
130 /* parent and root are a union, so this covers both */
131 if (back1->parent > back2->parent)
133 if (back1->parent < back2->parent)
136 /* This is a full backref and the parents match. */
137 if (back1->node.full_backref)
140 if (back1->owner > back2->owner)
142 if (back1->owner < back2->owner)
145 if (back1->offset > back2->offset)
147 if (back1->offset < back2->offset)
150 if (back1->bytes > back2->bytes)
152 if (back1->bytes < back2->bytes)
155 if (back1->found_ref && back2->found_ref) {
156 if (back1->disk_bytenr > back2->disk_bytenr)
158 if (back1->disk_bytenr < back2->disk_bytenr)
161 if (back1->found_ref > back2->found_ref)
163 if (back1->found_ref < back2->found_ref)
171 * Much like data_backref, just removed the undetermined members
172 * and change it to use list_head.
173 * During extent scan, it is stored in root->orphan_data_extent.
174 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
176 struct orphan_data_extent {
177 struct list_head list;
185 struct tree_backref {
186 struct extent_backref node;
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
195 return container_of(back, struct tree_backref, node);
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
200 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202 struct tree_backref *back1 = to_tree_backref(ext1);
203 struct tree_backref *back2 = to_tree_backref(ext2);
205 WARN_ON(ext1->is_data);
206 WARN_ON(ext2->is_data);
208 /* parent and root are a union, so this covers both */
209 if (back1->parent > back2->parent)
211 if (back1->parent < back2->parent)
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
219 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 if (ext1->is_data > ext2->is_data)
225 if (ext1->is_data < ext2->is_data)
228 if (ext1->full_backref > ext2->full_backref)
230 if (ext1->full_backref < ext2->full_backref)
234 return compare_data_backref(node1, node2);
236 return compare_tree_backref(node1, node2);
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
242 struct extent_record {
243 struct list_head backrefs;
244 struct list_head dups;
245 struct rb_root backref_tree;
246 struct list_head list;
247 struct cache_extent cache;
248 struct btrfs_disk_key parent_key;
253 u64 extent_item_refs;
255 u64 parent_generation;
259 unsigned int flag_block_full_backref:2;
260 unsigned int found_rec:1;
261 unsigned int content_checked:1;
262 unsigned int owner_ref_checked:1;
263 unsigned int is_root:1;
264 unsigned int metadata:1;
265 unsigned int bad_full_backref:1;
266 unsigned int crossing_stripes:1;
267 unsigned int wrong_chunk_type:1;
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
272 return container_of(entry, struct extent_record, list);
275 struct inode_backref {
276 struct list_head list;
277 unsigned int found_dir_item:1;
278 unsigned int found_dir_index:1;
279 unsigned int found_inode_ref:1;
280 unsigned int filetype:8;
282 unsigned int ref_type;
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
291 return list_entry(entry, struct inode_backref, list);
294 struct root_item_record {
295 struct list_head list;
302 struct btrfs_key drop_key;
305 #define REF_ERR_NO_DIR_ITEM (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX (1 << 1)
307 #define REF_ERR_NO_INODE_REF (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
310 #define REF_ERR_DUP_INODE_REF (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
319 struct file_extent_hole {
325 struct inode_record {
326 struct list_head backrefs;
327 unsigned int checked:1;
328 unsigned int merging:1;
329 unsigned int found_inode_item:1;
330 unsigned int found_dir_item:1;
331 unsigned int found_file_extent:1;
332 unsigned int found_csum_item:1;
333 unsigned int some_csum_missing:1;
334 unsigned int nodatasum:1;
347 struct rb_root holes;
348 struct list_head orphan_extents;
353 #define I_ERR_NO_INODE_ITEM (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
369 struct root_backref {
370 struct list_head list;
371 unsigned int found_dir_item:1;
372 unsigned int found_dir_index:1;
373 unsigned int found_back_ref:1;
374 unsigned int found_forward_ref:1;
375 unsigned int reachable:1;
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
386 return list_entry(entry, struct root_backref, list);
390 struct list_head backrefs;
391 struct cache_extent cache;
392 unsigned int found_root_item:1;
398 struct cache_extent cache;
403 struct cache_extent cache;
404 struct cache_tree root_cache;
405 struct cache_tree inode_cache;
406 struct inode_record *current;
415 struct walk_control {
416 struct cache_tree shared;
417 struct shared_node *nodes[BTRFS_MAX_LEVEL];
423 struct btrfs_key key;
425 struct list_head list;
428 struct extent_entry {
433 struct list_head list;
436 struct root_item_info {
437 /* level of the root */
439 /* number of nodes at this level, must be 1 for a root */
443 struct cache_extent cache_extent;
447 * Error bit for low memory mode check.
449 * Currently no caller cares about it yet. Just internal use for error
452 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH (1 << 8)
463 static void *print_status_check(void *p)
465 struct task_ctx *priv = p;
466 const char work_indicator[] = { '.', 'o', 'O', 'o' };
468 static char *task_position_string[] = {
470 "checking free space cache",
474 task_period_start(priv->info, 1000 /* 1s */);
476 if (priv->tp == TASK_NOTHING)
480 printf("%s [%c]\r", task_position_string[priv->tp],
481 work_indicator[count % 4]);
484 task_period_wait(priv->info);
489 static int print_status_return(void *p)
497 static enum btrfs_check_mode parse_check_mode(const char *str)
499 if (strcmp(str, "lowmem") == 0)
500 return CHECK_MODE_LOWMEM;
501 if (strcmp(str, "orig") == 0)
502 return CHECK_MODE_ORIGINAL;
503 if (strcmp(str, "original") == 0)
504 return CHECK_MODE_ORIGINAL;
506 return CHECK_MODE_UNKNOWN;
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
512 struct file_extent_hole *hole;
514 if (RB_EMPTY_ROOT(holes))
517 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
523 struct file_extent_hole *hole1;
524 struct file_extent_hole *hole2;
526 hole1 = rb_entry(node1, struct file_extent_hole, node);
527 hole2 = rb_entry(node2, struct file_extent_hole, node);
529 if (hole1->start > hole2->start)
531 if (hole1->start < hole2->start)
533 /* Now hole1->start == hole2->start */
534 if (hole1->len >= hole2->len)
536 * Hole 1 will be merge center
537 * Same hole will be merged later
540 /* Hole 2 will be merge center */
545 * Add a hole to the record
547 * This will do hole merge for copy_file_extent_holes(),
548 * which will ensure there won't be continuous holes.
550 static int add_file_extent_hole(struct rb_root *holes,
553 struct file_extent_hole *hole;
554 struct file_extent_hole *prev = NULL;
555 struct file_extent_hole *next = NULL;
557 hole = malloc(sizeof(*hole));
562 /* Since compare will not return 0, no -EEXIST will happen */
563 rb_insert(holes, &hole->node, compare_hole);
565 /* simple merge with previous hole */
566 if (rb_prev(&hole->node))
567 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
569 if (prev && prev->start + prev->len >= hole->start) {
570 hole->len = hole->start + hole->len - prev->start;
571 hole->start = prev->start;
572 rb_erase(&prev->node, holes);
577 /* iterate merge with next holes */
579 if (!rb_next(&hole->node))
581 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
583 if (hole->start + hole->len >= next->start) {
584 if (hole->start + hole->len <= next->start + next->len)
585 hole->len = next->start + next->len -
587 rb_erase(&next->node, holes);
596 static int compare_hole_range(struct rb_node *node, void *data)
598 struct file_extent_hole *hole;
601 hole = (struct file_extent_hole *)data;
604 hole = rb_entry(node, struct file_extent_hole, node);
605 if (start < hole->start)
607 if (start >= hole->start && start < hole->start + hole->len)
613 * Delete a hole in the record
615 * This will do the hole split and is much restrict than add.
617 static int del_file_extent_hole(struct rb_root *holes,
620 struct file_extent_hole *hole;
621 struct file_extent_hole tmp;
626 struct rb_node *node;
633 node = rb_search(holes, &tmp, compare_hole_range, NULL);
636 hole = rb_entry(node, struct file_extent_hole, node);
637 if (start + len > hole->start + hole->len)
641 * Now there will be no overlap, delete the hole and re-add the
642 * split(s) if they exists.
644 if (start > hole->start) {
645 prev_start = hole->start;
646 prev_len = start - hole->start;
649 if (hole->start + hole->len > start + len) {
650 next_start = start + len;
651 next_len = hole->start + hole->len - start - len;
654 rb_erase(node, holes);
657 ret = add_file_extent_hole(holes, prev_start, prev_len);
662 ret = add_file_extent_hole(holes, next_start, next_len);
669 static int copy_file_extent_holes(struct rb_root *dst,
672 struct file_extent_hole *hole;
673 struct rb_node *node;
676 node = rb_first(src);
678 hole = rb_entry(node, struct file_extent_hole, node);
679 ret = add_file_extent_hole(dst, hole->start, hole->len);
682 node = rb_next(node);
687 static void free_file_extent_holes(struct rb_root *holes)
689 struct rb_node *node;
690 struct file_extent_hole *hole;
692 node = rb_first(holes);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 rb_erase(node, holes);
697 node = rb_first(holes);
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root)
706 if (root->last_trans != trans->transid) {
707 root->track_dirty = 1;
708 root->last_trans = trans->transid;
709 root->commit_root = root->node;
710 extent_buffer_get(root->node);
714 static u8 imode_to_type(u32 imode)
717 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
719 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
720 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
721 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
722 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
723 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
724 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
727 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
733 struct device_record *rec1;
734 struct device_record *rec2;
736 rec1 = rb_entry(node1, struct device_record, node);
737 rec2 = rb_entry(node2, struct device_record, node);
738 if (rec1->devid > rec2->devid)
740 else if (rec1->devid < rec2->devid)
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
748 struct inode_record *rec;
749 struct inode_backref *backref;
750 struct inode_backref *orig;
751 struct inode_backref *tmp;
752 struct orphan_data_extent *src_orphan;
753 struct orphan_data_extent *dst_orphan;
757 rec = malloc(sizeof(*rec));
759 return ERR_PTR(-ENOMEM);
760 memcpy(rec, orig_rec, sizeof(*rec));
762 INIT_LIST_HEAD(&rec->backrefs);
763 INIT_LIST_HEAD(&rec->orphan_extents);
764 rec->holes = RB_ROOT;
766 list_for_each_entry(orig, &orig_rec->backrefs, list) {
767 size = sizeof(*orig) + orig->namelen + 1;
768 backref = malloc(size);
773 memcpy(backref, orig, size);
774 list_add_tail(&backref->list, &rec->backrefs);
776 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777 dst_orphan = malloc(sizeof(*dst_orphan));
782 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
785 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
791 if (!list_empty(&rec->backrefs))
792 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793 list_del(&orig->list);
797 if (!list_empty(&rec->orphan_extents))
798 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799 list_del(&orig->list);
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
811 struct orphan_data_extent *orphan;
813 if (list_empty(orphan_extents))
815 printf("The following data extent is lost in tree %llu:\n",
817 list_for_each_entry(orphan, orphan_extents, list) {
818 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819 orphan->objectid, orphan->offset, orphan->disk_bytenr,
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
826 u64 root_objectid = root->root_key.objectid;
827 int errors = rec->errors;
831 /* reloc root errors, we print its corresponding fs root objectid*/
832 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833 root_objectid = root->root_key.offset;
834 fprintf(stderr, "reloc");
836 fprintf(stderr, "root %llu inode %llu errors %x",
837 (unsigned long long) root_objectid,
838 (unsigned long long) rec->ino, rec->errors);
840 if (errors & I_ERR_NO_INODE_ITEM)
841 fprintf(stderr, ", no inode item");
842 if (errors & I_ERR_NO_ORPHAN_ITEM)
843 fprintf(stderr, ", no orphan item");
844 if (errors & I_ERR_DUP_INODE_ITEM)
845 fprintf(stderr, ", dup inode item");
846 if (errors & I_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & I_ERR_ODD_DIR_ITEM)
849 fprintf(stderr, ", odd dir item");
850 if (errors & I_ERR_ODD_FILE_EXTENT)
851 fprintf(stderr, ", odd file extent");
852 if (errors & I_ERR_BAD_FILE_EXTENT)
853 fprintf(stderr, ", bad file extent");
854 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855 fprintf(stderr, ", file extent overlap");
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857 fprintf(stderr, ", file extent discount");
858 if (errors & I_ERR_DIR_ISIZE_WRONG)
859 fprintf(stderr, ", dir isize wrong");
860 if (errors & I_ERR_FILE_NBYTES_WRONG)
861 fprintf(stderr, ", nbytes wrong");
862 if (errors & I_ERR_ODD_CSUM_ITEM)
863 fprintf(stderr, ", odd csum item");
864 if (errors & I_ERR_SOME_CSUM_MISSING)
865 fprintf(stderr, ", some csum missing");
866 if (errors & I_ERR_LINK_COUNT_WRONG)
867 fprintf(stderr, ", link count wrong");
868 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869 fprintf(stderr, ", orphan file extent");
870 fprintf(stderr, "\n");
871 /* Print the orphan extents if needed */
872 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
875 /* Print the holes if needed */
876 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877 struct file_extent_hole *hole;
878 struct rb_node *node;
881 node = rb_first(&rec->holes);
882 fprintf(stderr, "Found file extent holes:\n");
885 hole = rb_entry(node, struct file_extent_hole, node);
886 fprintf(stderr, "\tstart: %llu, len: %llu\n",
887 hole->start, hole->len);
888 node = rb_next(node);
891 fprintf(stderr, "\tstart: 0, len: %llu\n",
892 round_up(rec->isize, root->sectorsize));
896 static void print_ref_error(int errors)
898 if (errors & REF_ERR_NO_DIR_ITEM)
899 fprintf(stderr, ", no dir item");
900 if (errors & REF_ERR_NO_DIR_INDEX)
901 fprintf(stderr, ", no dir index");
902 if (errors & REF_ERR_NO_INODE_REF)
903 fprintf(stderr, ", no inode ref");
904 if (errors & REF_ERR_DUP_DIR_ITEM)
905 fprintf(stderr, ", dup dir item");
906 if (errors & REF_ERR_DUP_DIR_INDEX)
907 fprintf(stderr, ", dup dir index");
908 if (errors & REF_ERR_DUP_INODE_REF)
909 fprintf(stderr, ", dup inode ref");
910 if (errors & REF_ERR_INDEX_UNMATCH)
911 fprintf(stderr, ", index mismatch");
912 if (errors & REF_ERR_FILETYPE_UNMATCH)
913 fprintf(stderr, ", filetype mismatch");
914 if (errors & REF_ERR_NAME_TOO_LONG)
915 fprintf(stderr, ", name too long");
916 if (errors & REF_ERR_NO_ROOT_REF)
917 fprintf(stderr, ", no root ref");
918 if (errors & REF_ERR_NO_ROOT_BACKREF)
919 fprintf(stderr, ", no root backref");
920 if (errors & REF_ERR_DUP_ROOT_REF)
921 fprintf(stderr, ", dup root ref");
922 if (errors & REF_ERR_DUP_ROOT_BACKREF)
923 fprintf(stderr, ", dup root backref");
924 fprintf(stderr, "\n");
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
930 struct ptr_node *node;
931 struct cache_extent *cache;
932 struct inode_record *rec = NULL;
935 cache = lookup_cache_extent(inode_cache, ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
939 if (mod && rec->refs > 1) {
940 node->data = clone_inode_rec(rec);
941 if (IS_ERR(node->data))
947 rec = calloc(1, sizeof(*rec));
949 return ERR_PTR(-ENOMEM);
951 rec->extent_start = (u64)-1;
953 INIT_LIST_HEAD(&rec->backrefs);
954 INIT_LIST_HEAD(&rec->orphan_extents);
955 rec->holes = RB_ROOT;
957 node = malloc(sizeof(*node));
960 return ERR_PTR(-ENOMEM);
962 node->cache.start = ino;
963 node->cache.size = 1;
966 if (ino == BTRFS_FREE_INO_OBJECTID)
969 ret = insert_cache_extent(inode_cache, &node->cache);
971 return ERR_PTR(-EEXIST);
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
978 struct orphan_data_extent *orphan;
980 while (!list_empty(orphan_extents)) {
981 orphan = list_entry(orphan_extents->next,
982 struct orphan_data_extent, list);
983 list_del(&orphan->list);
988 static void free_inode_rec(struct inode_record *rec)
990 struct inode_backref *backref;
995 while (!list_empty(&rec->backrefs)) {
996 backref = to_inode_backref(rec->backrefs.next);
997 list_del(&backref->list);
1000 free_orphan_data_extents(&rec->orphan_extents);
1001 free_file_extent_holes(&rec->holes);
1005 static int can_free_inode_rec(struct inode_record *rec)
1007 if (!rec->errors && rec->checked && rec->found_inode_item &&
1008 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014 struct inode_record *rec)
1016 struct cache_extent *cache;
1017 struct inode_backref *tmp, *backref;
1018 struct ptr_node *node;
1019 unsigned char filetype;
1021 if (!rec->found_inode_item)
1024 filetype = imode_to_type(rec->imode);
1025 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026 if (backref->found_dir_item && backref->found_dir_index) {
1027 if (backref->filetype != filetype)
1028 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029 if (!backref->errors && backref->found_inode_ref &&
1030 rec->nlink == rec->found_link) {
1031 list_del(&backref->list);
1037 if (!rec->checked || rec->merging)
1040 if (S_ISDIR(rec->imode)) {
1041 if (rec->found_size != rec->isize)
1042 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043 if (rec->found_file_extent)
1044 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046 if (rec->found_dir_item)
1047 rec->errors |= I_ERR_ODD_DIR_ITEM;
1048 if (rec->found_size != rec->nbytes)
1049 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050 if (rec->nlink > 0 && !no_holes &&
1051 (rec->extent_end < rec->isize ||
1052 first_extent_gap(&rec->holes) < rec->isize))
1053 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1056 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057 if (rec->found_csum_item && rec->nodatasum)
1058 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059 if (rec->some_csum_missing && !rec->nodatasum)
1060 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1063 BUG_ON(rec->refs != 1);
1064 if (can_free_inode_rec(rec)) {
1065 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066 node = container_of(cache, struct ptr_node, cache);
1067 BUG_ON(node->data != rec);
1068 remove_cache_extent(inode_cache, &node->cache);
1070 free_inode_rec(rec);
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1076 struct btrfs_path path;
1077 struct btrfs_key key;
1080 key.objectid = BTRFS_ORPHAN_OBJECTID;
1081 key.type = BTRFS_ORPHAN_ITEM_KEY;
1084 btrfs_init_path(&path);
1085 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086 btrfs_release_path(&path);
1092 static int process_inode_item(struct extent_buffer *eb,
1093 int slot, struct btrfs_key *key,
1094 struct shared_node *active_node)
1096 struct inode_record *rec;
1097 struct btrfs_inode_item *item;
1099 rec = active_node->current;
1100 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101 if (rec->found_inode_item) {
1102 rec->errors |= I_ERR_DUP_INODE_ITEM;
1105 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106 rec->nlink = btrfs_inode_nlink(eb, item);
1107 rec->isize = btrfs_inode_size(eb, item);
1108 rec->nbytes = btrfs_inode_nbytes(eb, item);
1109 rec->imode = btrfs_inode_mode(eb, item);
1110 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1112 rec->found_inode_item = 1;
1113 if (rec->nlink == 0)
1114 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115 maybe_free_inode_rec(&active_node->inode_cache, rec);
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1121 int namelen, u64 dir)
1123 struct inode_backref *backref;
1125 list_for_each_entry(backref, &rec->backrefs, list) {
1126 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1128 if (backref->dir != dir || backref->namelen != namelen)
1130 if (memcmp(name, backref->name, namelen))
1135 backref = malloc(sizeof(*backref) + namelen + 1);
1138 memset(backref, 0, sizeof(*backref));
1140 backref->namelen = namelen;
1141 memcpy(backref->name, name, namelen);
1142 backref->name[namelen] = '\0';
1143 list_add_tail(&backref->list, &rec->backrefs);
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148 u64 ino, u64 dir, u64 index,
1149 const char *name, int namelen,
1150 int filetype, int itemtype, int errors)
1152 struct inode_record *rec;
1153 struct inode_backref *backref;
1155 rec = get_inode_rec(inode_cache, ino, 1);
1156 BUG_ON(IS_ERR(rec));
1157 backref = get_inode_backref(rec, name, namelen, dir);
1160 backref->errors |= errors;
1161 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162 if (backref->found_dir_index)
1163 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164 if (backref->found_inode_ref && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 if (backref->found_dir_item && backref->filetype != filetype)
1167 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1169 backref->index = index;
1170 backref->filetype = filetype;
1171 backref->found_dir_index = 1;
1172 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1174 if (backref->found_dir_item)
1175 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176 if (backref->found_dir_index && backref->filetype != filetype)
1177 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1179 backref->filetype = filetype;
1180 backref->found_dir_item = 1;
1181 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183 if (backref->found_inode_ref)
1184 backref->errors |= REF_ERR_DUP_INODE_REF;
1185 if (backref->found_dir_index && backref->index != index)
1186 backref->errors |= REF_ERR_INDEX_UNMATCH;
1188 backref->index = index;
1190 backref->ref_type = itemtype;
1191 backref->found_inode_ref = 1;
1196 maybe_free_inode_rec(inode_cache, rec);
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201 struct cache_tree *dst_cache)
1203 struct inode_backref *backref;
1208 list_for_each_entry(backref, &src->backrefs, list) {
1209 if (backref->found_dir_index) {
1210 add_inode_backref(dst_cache, dst->ino, backref->dir,
1211 backref->index, backref->name,
1212 backref->namelen, backref->filetype,
1213 BTRFS_DIR_INDEX_KEY, backref->errors);
1215 if (backref->found_dir_item) {
1217 add_inode_backref(dst_cache, dst->ino,
1218 backref->dir, 0, backref->name,
1219 backref->namelen, backref->filetype,
1220 BTRFS_DIR_ITEM_KEY, backref->errors);
1222 if (backref->found_inode_ref) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, backref->index,
1225 backref->name, backref->namelen, 0,
1226 backref->ref_type, backref->errors);
1230 if (src->found_dir_item)
1231 dst->found_dir_item = 1;
1232 if (src->found_file_extent)
1233 dst->found_file_extent = 1;
1234 if (src->found_csum_item)
1235 dst->found_csum_item = 1;
1236 if (src->some_csum_missing)
1237 dst->some_csum_missing = 1;
1238 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1244 BUG_ON(src->found_link < dir_count);
1245 dst->found_link += src->found_link - dir_count;
1246 dst->found_size += src->found_size;
1247 if (src->extent_start != (u64)-1) {
1248 if (dst->extent_start == (u64)-1) {
1249 dst->extent_start = src->extent_start;
1250 dst->extent_end = src->extent_end;
1252 if (dst->extent_end > src->extent_start)
1253 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254 else if (dst->extent_end < src->extent_start) {
1255 ret = add_file_extent_hole(&dst->holes,
1257 src->extent_start - dst->extent_end);
1259 if (dst->extent_end < src->extent_end)
1260 dst->extent_end = src->extent_end;
1264 dst->errors |= src->errors;
1265 if (src->found_inode_item) {
1266 if (!dst->found_inode_item) {
1267 dst->nlink = src->nlink;
1268 dst->isize = src->isize;
1269 dst->nbytes = src->nbytes;
1270 dst->imode = src->imode;
1271 dst->nodatasum = src->nodatasum;
1272 dst->found_inode_item = 1;
1274 dst->errors |= I_ERR_DUP_INODE_ITEM;
1282 static int splice_shared_node(struct shared_node *src_node,
1283 struct shared_node *dst_node)
1285 struct cache_extent *cache;
1286 struct ptr_node *node, *ins;
1287 struct cache_tree *src, *dst;
1288 struct inode_record *rec, *conflict;
1289 u64 current_ino = 0;
1293 if (--src_node->refs == 0)
1295 if (src_node->current)
1296 current_ino = src_node->current->ino;
1298 src = &src_node->root_cache;
1299 dst = &dst_node->root_cache;
1301 cache = search_cache_extent(src, 0);
1303 node = container_of(cache, struct ptr_node, cache);
1305 cache = next_cache_extent(cache);
1308 remove_cache_extent(src, &node->cache);
1311 ins = malloc(sizeof(*ins));
1313 ins->cache.start = node->cache.start;
1314 ins->cache.size = node->cache.size;
1318 ret = insert_cache_extent(dst, &ins->cache);
1319 if (ret == -EEXIST) {
1320 conflict = get_inode_rec(dst, rec->ino, 1);
1321 BUG_ON(IS_ERR(conflict));
1322 merge_inode_recs(rec, conflict, dst);
1324 conflict->checked = 1;
1325 if (dst_node->current == conflict)
1326 dst_node->current = NULL;
1328 maybe_free_inode_rec(dst, conflict);
1329 free_inode_rec(rec);
1336 if (src == &src_node->root_cache) {
1337 src = &src_node->inode_cache;
1338 dst = &dst_node->inode_cache;
1342 if (current_ino > 0 && (!dst_node->current ||
1343 current_ino > dst_node->current->ino)) {
1344 if (dst_node->current) {
1345 dst_node->current->checked = 1;
1346 maybe_free_inode_rec(dst, dst_node->current);
1348 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349 BUG_ON(IS_ERR(dst_node->current));
1354 static void free_inode_ptr(struct cache_extent *cache)
1356 struct ptr_node *node;
1357 struct inode_record *rec;
1359 node = container_of(cache, struct ptr_node, cache);
1361 free_inode_rec(rec);
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1370 struct cache_extent *cache;
1371 struct shared_node *node;
1373 cache = lookup_cache_extent(shared, bytenr, 1);
1375 node = container_of(cache, struct shared_node, cache);
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1384 struct shared_node *node;
1386 node = calloc(1, sizeof(*node));
1389 node->cache.start = bytenr;
1390 node->cache.size = 1;
1391 cache_tree_init(&node->root_cache);
1392 cache_tree_init(&node->inode_cache);
1395 ret = insert_cache_extent(shared, &node->cache);
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401 struct walk_control *wc, int level)
1403 struct shared_node *node;
1404 struct shared_node *dest;
1407 if (level == wc->active_node)
1410 BUG_ON(wc->active_node <= level);
1411 node = find_shared_node(&wc->shared, bytenr);
1413 ret = add_shared_node(&wc->shared, bytenr, refs);
1415 node = find_shared_node(&wc->shared, bytenr);
1416 wc->nodes[level] = node;
1417 wc->active_node = level;
1421 if (wc->root_level == wc->active_node &&
1422 btrfs_root_refs(&root->root_item) == 0) {
1423 if (--node->refs == 0) {
1424 free_inode_recs_tree(&node->root_cache);
1425 free_inode_recs_tree(&node->inode_cache);
1426 remove_cache_extent(&wc->shared, &node->cache);
1432 dest = wc->nodes[wc->active_node];
1433 splice_shared_node(node, dest);
1434 if (node->refs == 0) {
1435 remove_cache_extent(&wc->shared, &node->cache);
1441 static int leave_shared_node(struct btrfs_root *root,
1442 struct walk_control *wc, int level)
1444 struct shared_node *node;
1445 struct shared_node *dest;
1448 if (level == wc->root_level)
1451 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1455 BUG_ON(i >= BTRFS_MAX_LEVEL);
1457 node = wc->nodes[wc->active_node];
1458 wc->nodes[wc->active_node] = NULL;
1459 wc->active_node = i;
1461 dest = wc->nodes[wc->active_node];
1462 if (wc->active_node < wc->root_level ||
1463 btrfs_root_refs(&root->root_item) > 0) {
1464 BUG_ON(node->refs <= 1);
1465 splice_shared_node(node, dest);
1467 BUG_ON(node->refs < 2);
1476 * 1 - if the root with id child_root_id is a child of root parent_root_id
1477 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1478 * has other root(s) as parent(s)
1479 * 2 - if the root child_root_id doesn't have any parent roots
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1484 struct btrfs_path path;
1485 struct btrfs_key key;
1486 struct extent_buffer *leaf;
1490 btrfs_init_path(&path);
1492 key.objectid = parent_root_id;
1493 key.type = BTRFS_ROOT_REF_KEY;
1494 key.offset = child_root_id;
1495 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1499 btrfs_release_path(&path);
1503 key.objectid = child_root_id;
1504 key.type = BTRFS_ROOT_BACKREF_KEY;
1506 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1512 leaf = path.nodes[0];
1513 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1517 leaf = path.nodes[0];
1520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521 if (key.objectid != child_root_id ||
1522 key.type != BTRFS_ROOT_BACKREF_KEY)
1527 if (key.offset == parent_root_id) {
1528 btrfs_release_path(&path);
1535 btrfs_release_path(&path);
1538 return has_parent ? 0 : 2;
1541 static int process_dir_item(struct btrfs_root *root,
1542 struct extent_buffer *eb,
1543 int slot, struct btrfs_key *key,
1544 struct shared_node *active_node)
1554 struct btrfs_dir_item *di;
1555 struct inode_record *rec;
1556 struct cache_tree *root_cache;
1557 struct cache_tree *inode_cache;
1558 struct btrfs_key location;
1559 char namebuf[BTRFS_NAME_LEN];
1561 root_cache = &active_node->root_cache;
1562 inode_cache = &active_node->inode_cache;
1563 rec = active_node->current;
1564 rec->found_dir_item = 1;
1566 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567 total = btrfs_item_size_nr(eb, slot);
1568 while (cur < total) {
1570 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571 name_len = btrfs_dir_name_len(eb, di);
1572 data_len = btrfs_dir_data_len(eb, di);
1573 filetype = btrfs_dir_type(eb, di);
1575 rec->found_size += name_len;
1576 if (name_len <= BTRFS_NAME_LEN) {
1580 len = BTRFS_NAME_LEN;
1581 error = REF_ERR_NAME_TOO_LONG;
1583 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1585 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586 add_inode_backref(inode_cache, location.objectid,
1587 key->objectid, key->offset, namebuf,
1588 len, filetype, key->type, error);
1589 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590 add_inode_backref(root_cache, location.objectid,
1591 key->objectid, key->offset,
1592 namebuf, len, filetype,
1595 fprintf(stderr, "invalid location in dir item %u\n",
1597 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598 key->objectid, key->offset, namebuf,
1599 len, filetype, key->type, error);
1602 len = sizeof(*di) + name_len + data_len;
1603 di = (struct btrfs_dir_item *)((char *)di + len);
1606 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607 rec->errors |= I_ERR_DUP_DIR_INDEX;
1612 static int process_inode_ref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1622 struct cache_tree *inode_cache;
1623 struct btrfs_inode_ref *ref;
1624 char namebuf[BTRFS_NAME_LEN];
1626 inode_cache = &active_node->inode_cache;
1628 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629 total = btrfs_item_size_nr(eb, slot);
1630 while (cur < total) {
1631 name_len = btrfs_inode_ref_name_len(eb, ref);
1632 index = btrfs_inode_ref_index(eb, ref);
1633 if (name_len <= BTRFS_NAME_LEN) {
1637 len = BTRFS_NAME_LEN;
1638 error = REF_ERR_NAME_TOO_LONG;
1640 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641 add_inode_backref(inode_cache, key->objectid, key->offset,
1642 index, namebuf, len, 0, key->type, error);
1644 len = sizeof(*ref) + name_len;
1645 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1651 static int process_inode_extref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1662 struct cache_tree *inode_cache;
1663 struct btrfs_inode_extref *extref;
1664 char namebuf[BTRFS_NAME_LEN];
1666 inode_cache = &active_node->inode_cache;
1668 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669 total = btrfs_item_size_nr(eb, slot);
1670 while (cur < total) {
1671 name_len = btrfs_inode_extref_name_len(eb, extref);
1672 index = btrfs_inode_extref_index(eb, extref);
1673 parent = btrfs_inode_extref_parent(eb, extref);
1674 if (name_len <= BTRFS_NAME_LEN) {
1678 len = BTRFS_NAME_LEN;
1679 error = REF_ERR_NAME_TOO_LONG;
1681 read_extent_buffer(eb, namebuf,
1682 (unsigned long)(extref + 1), len);
1683 add_inode_backref(inode_cache, key->objectid, parent,
1684 index, namebuf, len, 0, key->type, error);
1686 len = sizeof(*extref) + name_len;
1687 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695 u64 len, u64 *found)
1697 struct btrfs_key key;
1698 struct btrfs_path path;
1699 struct extent_buffer *leaf;
1704 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1706 btrfs_init_path(&path);
1708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1710 key.type = BTRFS_EXTENT_CSUM_KEY;
1712 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1716 if (ret > 0 && path.slots[0] > 0) {
1717 leaf = path.nodes[0];
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720 key.type == BTRFS_EXTENT_CSUM_KEY)
1725 leaf = path.nodes[0];
1726 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1732 leaf = path.nodes[0];
1735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737 key.type != BTRFS_EXTENT_CSUM_KEY)
1740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741 if (key.offset >= start + len)
1744 if (key.offset > start)
1747 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749 if (csum_end > start) {
1750 size = min(csum_end - start, len);
1759 btrfs_release_path(&path);
1765 static int process_file_extent(struct btrfs_root *root,
1766 struct extent_buffer *eb,
1767 int slot, struct btrfs_key *key,
1768 struct shared_node *active_node)
1770 struct inode_record *rec;
1771 struct btrfs_file_extent_item *fi;
1773 u64 disk_bytenr = 0;
1774 u64 extent_offset = 0;
1775 u64 mask = root->sectorsize - 1;
1779 rec = active_node->current;
1780 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781 rec->found_file_extent = 1;
1783 if (rec->extent_start == (u64)-1) {
1784 rec->extent_start = key->offset;
1785 rec->extent_end = key->offset;
1788 if (rec->extent_end > key->offset)
1789 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790 else if (rec->extent_end < key->offset) {
1791 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792 key->offset - rec->extent_end);
1797 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798 extent_type = btrfs_file_extent_type(eb, fi);
1800 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->found_size += num_bytes;
1805 num_bytes = (num_bytes + mask) & ~mask;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810 extent_offset = btrfs_file_extent_offset(eb, fi);
1811 if (num_bytes == 0 || (num_bytes & mask))
1812 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813 if (num_bytes + extent_offset >
1814 btrfs_file_extent_ram_bytes(eb, fi))
1815 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817 (btrfs_file_extent_compression(eb, fi) ||
1818 btrfs_file_extent_encryption(eb, fi) ||
1819 btrfs_file_extent_other_encoding(eb, fi)))
1820 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821 if (disk_bytenr > 0)
1822 rec->found_size += num_bytes;
1824 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1826 rec->extent_end = key->offset + num_bytes;
1829 * The data reloc tree will copy full extents into its inode and then
1830 * copy the corresponding csums. Because the extent it copied could be
1831 * a preallocated extent that hasn't been written to yet there may be no
1832 * csums to copy, ergo we won't have csums for our file extent. This is
1833 * ok so just don't bother checking csums if the inode belongs to the
1836 if (disk_bytenr > 0 &&
1837 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1839 if (btrfs_file_extent_compression(eb, fi))
1840 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1842 disk_bytenr += extent_offset;
1844 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1847 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1849 rec->found_csum_item = 1;
1850 if (found < num_bytes)
1851 rec->some_csum_missing = 1;
1852 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1854 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861 struct walk_control *wc)
1863 struct btrfs_key key;
1867 struct cache_tree *inode_cache;
1868 struct shared_node *active_node;
1870 if (wc->root_level == wc->active_node &&
1871 btrfs_root_refs(&root->root_item) == 0)
1874 active_node = wc->nodes[wc->active_node];
1875 inode_cache = &active_node->inode_cache;
1876 nritems = btrfs_header_nritems(eb);
1877 for (i = 0; i < nritems; i++) {
1878 btrfs_item_key_to_cpu(eb, &key, i);
1880 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1882 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1885 if (active_node->current == NULL ||
1886 active_node->current->ino < key.objectid) {
1887 if (active_node->current) {
1888 active_node->current->checked = 1;
1889 maybe_free_inode_rec(inode_cache,
1890 active_node->current);
1892 active_node->current = get_inode_rec(inode_cache,
1894 BUG_ON(IS_ERR(active_node->current));
1897 case BTRFS_DIR_ITEM_KEY:
1898 case BTRFS_DIR_INDEX_KEY:
1899 ret = process_dir_item(root, eb, i, &key, active_node);
1901 case BTRFS_INODE_REF_KEY:
1902 ret = process_inode_ref(eb, i, &key, active_node);
1904 case BTRFS_INODE_EXTREF_KEY:
1905 ret = process_inode_extref(eb, i, &key, active_node);
1907 case BTRFS_INODE_ITEM_KEY:
1908 ret = process_inode_item(eb, i, &key, active_node);
1910 case BTRFS_EXTENT_DATA_KEY:
1911 ret = process_file_extent(root, eb, i, &key,
1921 static void reada_walk_down(struct btrfs_root *root,
1922 struct extent_buffer *node, int slot)
1931 level = btrfs_header_level(node);
1935 nritems = btrfs_header_nritems(node);
1936 blocksize = root->nodesize;
1937 for (i = slot; i < nritems; i++) {
1938 bytenr = btrfs_node_blockptr(node, i);
1939 ptr_gen = btrfs_node_ptr_generation(node, i);
1940 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1945 * Check the child node/leaf by the following condition:
1946 * 1. the first item key of the node/leaf should be the same with the one
1948 * 2. block in parent node should match the child node/leaf.
1949 * 3. generation of parent node and child's header should be consistent.
1951 * Or the child node/leaf pointed by the key in parent is not valid.
1953 * We hope to check leaf owner too, but since subvol may share leaves,
1954 * which makes leaf owner check not so strong, key check should be
1955 * sufficient enough for that case.
1957 static int check_child_node(struct btrfs_root *root,
1958 struct extent_buffer *parent, int slot,
1959 struct extent_buffer *child)
1961 struct btrfs_key parent_key;
1962 struct btrfs_key child_key;
1965 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966 if (btrfs_header_level(child) == 0)
1967 btrfs_item_key_to_cpu(child, &child_key, 0);
1969 btrfs_node_key_to_cpu(child, &child_key, 0);
1971 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1974 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975 parent_key.objectid, parent_key.type, parent_key.offset,
1976 child_key.objectid, child_key.type, child_key.offset);
1978 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1980 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981 btrfs_node_blockptr(parent, slot),
1982 btrfs_header_bytenr(child));
1984 if (btrfs_node_ptr_generation(parent, slot) !=
1985 btrfs_header_generation(child)) {
1987 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988 btrfs_header_generation(child),
1989 btrfs_node_ptr_generation(parent, slot));
1995 u64 bytenr[BTRFS_MAX_LEVEL];
1996 u64 refs[BTRFS_MAX_LEVEL];
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000 struct walk_control *wc, int *level,
2001 struct node_refs *nrefs)
2003 enum btrfs_tree_block_status status;
2006 struct extent_buffer *next;
2007 struct extent_buffer *cur;
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2015 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016 refs = nrefs->refs[*level];
2019 ret = btrfs_lookup_extent_info(NULL, root,
2020 path->nodes[*level]->start,
2021 *level, 1, &refs, NULL);
2026 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027 nrefs->refs[*level] = refs;
2031 ret = enter_shared_node(root, path->nodes[*level]->start,
2039 while (*level >= 0) {
2040 WARN_ON(*level < 0);
2041 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042 cur = path->nodes[*level];
2044 if (btrfs_header_level(cur) != *level)
2047 if (path->slots[*level] >= btrfs_header_nritems(cur))
2050 ret = process_one_leaf(root, cur, wc);
2055 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057 blocksize = root->nodesize;
2059 if (bytenr == nrefs->bytenr[*level - 1]) {
2060 refs = nrefs->refs[*level - 1];
2062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063 *level - 1, 1, &refs, NULL);
2067 nrefs->bytenr[*level - 1] = bytenr;
2068 nrefs->refs[*level - 1] = refs;
2073 ret = enter_shared_node(root, bytenr, refs,
2076 path->slots[*level]++;
2081 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083 free_extent_buffer(next);
2084 reada_walk_down(root, cur, path->slots[*level]);
2085 next = read_tree_block(root, bytenr, blocksize,
2087 if (!extent_buffer_uptodate(next)) {
2088 struct btrfs_key node_key;
2090 btrfs_node_key_to_cpu(path->nodes[*level],
2092 path->slots[*level]);
2093 btrfs_add_corrupt_extent_record(root->fs_info,
2095 path->nodes[*level]->start,
2096 root->nodesize, *level);
2102 ret = check_child_node(root, cur, path->slots[*level], next);
2108 if (btrfs_is_leaf(next))
2109 status = btrfs_check_leaf(root, NULL, next);
2111 status = btrfs_check_node(root, NULL, next);
2112 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113 free_extent_buffer(next);
2118 *level = *level - 1;
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = next;
2121 path->slots[*level] = 0;
2124 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129 struct walk_control *wc, int *level)
2132 struct extent_buffer *leaf;
2134 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135 leaf = path->nodes[i];
2136 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2141 free_extent_buffer(path->nodes[*level]);
2142 path->nodes[*level] = NULL;
2143 BUG_ON(*level > wc->active_node);
2144 if (*level == wc->active_node)
2145 leave_shared_node(root, wc, *level);
2152 static int check_root_dir(struct inode_record *rec)
2154 struct inode_backref *backref;
2157 if (!rec->found_inode_item || rec->errors)
2159 if (rec->nlink != 1 || rec->found_link != 0)
2161 if (list_empty(&rec->backrefs))
2163 backref = to_inode_backref(rec->backrefs.next);
2164 if (!backref->found_inode_ref)
2166 if (backref->index != 0 || backref->namelen != 2 ||
2167 memcmp(backref->name, "..", 2))
2169 if (backref->found_dir_index || backref->found_dir_item)
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177 struct btrfs_root *root, struct btrfs_path *path,
2178 struct inode_record *rec)
2180 struct btrfs_inode_item *ei;
2181 struct btrfs_key key;
2184 key.objectid = rec->ino;
2185 key.type = BTRFS_INODE_ITEM_KEY;
2186 key.offset = (u64)-1;
2188 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2192 if (!path->slots[0]) {
2199 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200 if (key.objectid != rec->ino) {
2205 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206 struct btrfs_inode_item);
2207 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208 btrfs_mark_buffer_dirty(path->nodes[0]);
2209 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211 root->root_key.objectid);
2213 btrfs_release_path(path);
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218 struct btrfs_root *root,
2219 struct btrfs_path *path,
2220 struct inode_record *rec)
2224 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225 btrfs_release_path(path);
2227 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_path *path,
2234 struct inode_record *rec)
2236 struct btrfs_inode_item *ei;
2237 struct btrfs_key key;
2240 key.objectid = rec->ino;
2241 key.type = BTRFS_INODE_ITEM_KEY;
2244 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251 /* Since ret == 0, no need to check anything */
2252 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253 struct btrfs_inode_item);
2254 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255 btrfs_mark_buffer_dirty(path->nodes[0]);
2256 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257 printf("reset nbytes for ino %llu root %llu\n",
2258 rec->ino, root->root_key.objectid);
2260 btrfs_release_path(path);
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265 struct cache_tree *inode_cache,
2266 struct inode_record *rec,
2267 struct inode_backref *backref)
2269 struct btrfs_path *path;
2270 struct btrfs_trans_handle *trans;
2271 struct btrfs_dir_item *dir_item;
2272 struct extent_buffer *leaf;
2273 struct btrfs_key key;
2274 struct btrfs_disk_key disk_key;
2275 struct inode_record *dir_rec;
2276 unsigned long name_ptr;
2277 u32 data_size = sizeof(*dir_item) + backref->namelen;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2290 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291 (unsigned long long)rec->ino);
2292 key.objectid = backref->dir;
2293 key.type = BTRFS_DIR_INDEX_KEY;
2294 key.offset = backref->index;
2296 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2299 leaf = path->nodes[0];
2300 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2302 disk_key.objectid = cpu_to_le64(rec->ino);
2303 disk_key.type = BTRFS_INODE_ITEM_KEY;
2304 disk_key.offset = 0;
2306 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308 btrfs_set_dir_data_len(leaf, dir_item, 0);
2309 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310 name_ptr = (unsigned long)(dir_item + 1);
2311 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312 btrfs_mark_buffer_dirty(leaf);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2316 backref->found_dir_index = 1;
2317 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318 BUG_ON(IS_ERR(dir_rec));
2321 dir_rec->found_size += backref->namelen;
2322 if (dir_rec->found_size == dir_rec->isize &&
2323 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325 if (dir_rec->found_size != dir_rec->isize)
2326 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2331 static int delete_dir_index(struct btrfs_root *root,
2332 struct cache_tree *inode_cache,
2333 struct inode_record *rec,
2334 struct inode_backref *backref)
2336 struct btrfs_trans_handle *trans;
2337 struct btrfs_dir_item *di;
2338 struct btrfs_path *path;
2341 path = btrfs_alloc_path();
2345 trans = btrfs_start_transaction(root, 1);
2346 if (IS_ERR(trans)) {
2347 btrfs_free_path(path);
2348 return PTR_ERR(trans);
2352 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353 (unsigned long long)backref->dir,
2354 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355 (unsigned long long)root->objectid);
2357 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358 backref->name, backref->namelen,
2359 backref->index, -1);
2362 btrfs_free_path(path);
2363 btrfs_commit_transaction(trans, root);
2370 ret = btrfs_del_item(trans, root, path);
2372 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2374 btrfs_free_path(path);
2375 btrfs_commit_transaction(trans, root);
2379 static int create_inode_item(struct btrfs_root *root,
2380 struct inode_record *rec,
2381 struct inode_backref *backref, int root_dir)
2383 struct btrfs_trans_handle *trans;
2384 struct btrfs_inode_item inode_item;
2385 time_t now = time(NULL);
2388 trans = btrfs_start_transaction(root, 1);
2389 if (IS_ERR(trans)) {
2390 ret = PTR_ERR(trans);
2394 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395 "be incomplete, please check permissions and content after "
2396 "the fsck completes.\n", (unsigned long long)root->objectid,
2397 (unsigned long long)rec->ino);
2399 memset(&inode_item, 0, sizeof(inode_item));
2400 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2402 btrfs_set_stack_inode_nlink(&inode_item, 1);
2404 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406 if (rec->found_dir_item) {
2407 if (rec->found_file_extent)
2408 fprintf(stderr, "root %llu inode %llu has both a dir "
2409 "item and extents, unsure if it is a dir or a "
2410 "regular file so setting it as a directory\n",
2411 (unsigned long long)root->objectid,
2412 (unsigned long long)rec->ino);
2413 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415 } else if (!rec->found_dir_item) {
2416 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2419 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2428 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2430 btrfs_commit_transaction(trans, root);
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435 struct inode_record *rec,
2436 struct cache_tree *inode_cache,
2439 struct inode_backref *tmp, *backref;
2440 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2444 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445 if (!delete && rec->ino == root_dirid) {
2446 if (!rec->found_inode_item) {
2447 ret = create_inode_item(root, rec, backref, 1);
2454 /* Index 0 for root dir's are special, don't mess with it */
2455 if (rec->ino == root_dirid && backref->index == 0)
2459 ((backref->found_dir_index && !backref->found_inode_ref) ||
2460 (backref->found_dir_index && backref->found_inode_ref &&
2461 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462 ret = delete_dir_index(root, inode_cache, rec, backref);
2466 list_del(&backref->list);
2470 if (!delete && !backref->found_dir_index &&
2471 backref->found_dir_item && backref->found_inode_ref) {
2472 ret = add_missing_dir_index(root, inode_cache, rec,
2477 if (backref->found_dir_item &&
2478 backref->found_dir_index &&
2479 backref->found_dir_index) {
2480 if (!backref->errors &&
2481 backref->found_inode_ref) {
2482 list_del(&backref->list);
2488 if (!delete && (!backref->found_dir_index &&
2489 !backref->found_dir_item &&
2490 backref->found_inode_ref)) {
2491 struct btrfs_trans_handle *trans;
2492 struct btrfs_key location;
2494 ret = check_dir_conflict(root, backref->name,
2500 * let nlink fixing routine to handle it,
2501 * which can do it better.
2506 location.objectid = rec->ino;
2507 location.type = BTRFS_INODE_ITEM_KEY;
2508 location.offset = 0;
2510 trans = btrfs_start_transaction(root, 1);
2511 if (IS_ERR(trans)) {
2512 ret = PTR_ERR(trans);
2515 fprintf(stderr, "adding missing dir index/item pair "
2517 (unsigned long long)rec->ino);
2518 ret = btrfs_insert_dir_item(trans, root, backref->name,
2520 backref->dir, &location,
2521 imode_to_type(rec->imode),
2524 btrfs_commit_transaction(trans, root);
2528 if (!delete && (backref->found_inode_ref &&
2529 backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532 !rec->found_inode_item)) {
2533 ret = create_inode_item(root, rec, backref, 0);
2540 return ret ? ret : repaired;
2544 * To determine the file type for nlink/inode_item repair
2546 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547 * Return -ENOENT if file type is not found.
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2551 struct inode_backref *backref;
2553 /* For inode item recovered case */
2554 if (rec->found_inode_item) {
2555 *type = imode_to_type(rec->imode);
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item) {
2561 *type = backref->filetype;
2569 * To determine the file name for nlink repair
2571 * Return 0 if file name is found, set name and namelen.
2572 * Return -ENOENT if file name is not found.
2574 static int find_file_name(struct inode_record *rec,
2575 char *name, int *namelen)
2577 struct inode_backref *backref;
2579 list_for_each_entry(backref, &rec->backrefs, list) {
2580 if (backref->found_dir_index || backref->found_dir_item ||
2581 backref->found_inode_ref) {
2582 memcpy(name, backref->name, backref->namelen);
2583 *namelen = backref->namelen;
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592 struct btrfs_root *root,
2593 struct btrfs_path *path,
2594 struct inode_record *rec)
2596 struct inode_backref *backref;
2597 struct inode_backref *tmp;
2598 struct btrfs_key key;
2599 struct btrfs_inode_item *inode_item;
2602 /* We don't believe this either, reset it and iterate backref */
2603 rec->found_link = 0;
2605 /* Remove all backref including the valid ones */
2606 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608 backref->index, backref->name,
2609 backref->namelen, 0);
2613 /* remove invalid backref, so it won't be added back */
2614 if (!(backref->found_dir_index &&
2615 backref->found_dir_item &&
2616 backref->found_inode_ref)) {
2617 list_del(&backref->list);
2624 /* Set nlink to 0 */
2625 key.objectid = rec->ino;
2626 key.type = BTRFS_INODE_ITEM_KEY;
2628 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636 struct btrfs_inode_item);
2637 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638 btrfs_mark_buffer_dirty(path->nodes[0]);
2639 btrfs_release_path(path);
2642 * Add back valid inode_ref/dir_item/dir_index,
2643 * add_link() will handle the nlink inc, so new nlink must be correct
2645 list_for_each_entry(backref, &rec->backrefs, list) {
2646 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647 backref->name, backref->namelen,
2648 backref->filetype, &backref->index, 1);
2653 btrfs_release_path(path);
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658 struct btrfs_root *root,
2659 struct btrfs_path *path,
2660 struct inode_record *rec)
2662 char *dir_name = "lost+found";
2663 char namebuf[BTRFS_NAME_LEN] = {0};
2668 int name_recovered = 0;
2669 int type_recovered = 0;
2673 * Get file name and type first before these invalid inode ref
2674 * are deleted by remove_all_invalid_backref()
2676 name_recovered = !find_file_name(rec, namebuf, &namelen);
2677 type_recovered = !find_file_type(rec, &type);
2679 if (!name_recovered) {
2680 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681 rec->ino, rec->ino);
2682 namelen = count_digits(rec->ino);
2683 sprintf(namebuf, "%llu", rec->ino);
2686 if (!type_recovered) {
2687 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2689 type = BTRFS_FT_REG_FILE;
2693 ret = reset_nlink(trans, root, path, rec);
2696 "Failed to reset nlink for inode %llu: %s\n",
2697 rec->ino, strerror(-ret));
2701 if (rec->found_link == 0) {
2702 lost_found_ino = root->highest_inode;
2703 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2708 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2712 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713 dir_name, strerror(-ret));
2716 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717 namebuf, namelen, type, NULL, 1);
2719 * Add ".INO" suffix several times to handle case where
2720 * "FILENAME.INO" is already taken by another file.
2722 while (ret == -EEXIST) {
2724 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2726 if (namelen + count_digits(rec->ino) + 1 >
2731 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2733 namelen += count_digits(rec->ino) + 1;
2734 ret = btrfs_add_link(trans, root, rec->ino,
2735 lost_found_ino, namebuf,
2736 namelen, type, NULL, 1);
2740 "Failed to link the inode %llu to %s dir: %s\n",
2741 rec->ino, dir_name, strerror(-ret));
2745 * Just increase the found_link, don't actually add the
2746 * backref. This will make things easier and this inode
2747 * record will be freed after the repair is done.
2748 * So fsck will not report problem about this inode.
2751 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752 namelen, namebuf, dir_name);
2754 printf("Fixed the nlink of inode %llu\n", rec->ino);
2757 * Clear the flag anyway, or we will loop forever for the same inode
2758 * as it will not be removed from the bad inode list and the dead loop
2761 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762 btrfs_release_path(path);
2767 * Check if there is any normal(reg or prealloc) file extent for given
2769 * This is used to determine the file type when neither its dir_index/item or
2770 * inode_item exists.
2772 * This will *NOT* report error, if any error happens, just consider it does
2773 * not have any normal file extent.
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2777 struct btrfs_path *path;
2778 struct btrfs_key key;
2779 struct btrfs_key found_key;
2780 struct btrfs_file_extent_item *fi;
2784 path = btrfs_alloc_path();
2788 key.type = BTRFS_EXTENT_DATA_KEY;
2791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2796 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797 ret = btrfs_next_leaf(root, path);
2804 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2806 if (found_key.objectid != ino ||
2807 found_key.type != BTRFS_EXTENT_DATA_KEY)
2809 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810 struct btrfs_file_extent_item);
2811 type = btrfs_file_extent_type(path->nodes[0], fi);
2812 if (type != BTRFS_FILE_EXTENT_INLINE) {
2818 btrfs_free_path(path);
2822 static u32 btrfs_type_to_imode(u8 type)
2824 static u32 imode_by_btrfs_type[] = {
2825 [BTRFS_FT_REG_FILE] = S_IFREG,
2826 [BTRFS_FT_DIR] = S_IFDIR,
2827 [BTRFS_FT_CHRDEV] = S_IFCHR,
2828 [BTRFS_FT_BLKDEV] = S_IFBLK,
2829 [BTRFS_FT_FIFO] = S_IFIFO,
2830 [BTRFS_FT_SOCK] = S_IFSOCK,
2831 [BTRFS_FT_SYMLINK] = S_IFLNK,
2834 return imode_by_btrfs_type[(type)];
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root,
2839 struct btrfs_path *path,
2840 struct inode_record *rec)
2844 int type_recovered = 0;
2847 printf("Trying to rebuild inode:%llu\n", rec->ino);
2849 type_recovered = !find_file_type(rec, &filetype);
2852 * Try to determine inode type if type not found.
2854 * For found regular file extent, it must be FILE.
2855 * For found dir_item/index, it must be DIR.
2857 * For undetermined one, use FILE as fallback.
2860 * 1. If found backref(inode_index/item is already handled) to it,
2862 * Need new inode-inode ref structure to allow search for that.
2864 if (!type_recovered) {
2865 if (rec->found_file_extent &&
2866 find_normal_file_extent(root, rec->ino)) {
2868 filetype = BTRFS_FT_REG_FILE;
2869 } else if (rec->found_dir_item) {
2871 filetype = BTRFS_FT_DIR;
2872 } else if (!list_empty(&rec->orphan_extents)) {
2874 filetype = BTRFS_FT_REG_FILE;
2876 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2879 filetype = BTRFS_FT_REG_FILE;
2883 ret = btrfs_new_inode(trans, root, rec->ino,
2884 mode | btrfs_type_to_imode(filetype));
2889 * Here inode rebuild is done, we only rebuild the inode item,
2890 * don't repair the nlink(like move to lost+found).
2891 * That is the job of nlink repair.
2893 * We just fill the record and return
2895 rec->found_dir_item = 1;
2896 rec->imode = mode | btrfs_type_to_imode(filetype);
2898 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899 /* Ensure the inode_nlinks repair function will be called */
2900 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct orphan_data_extent *orphan;
2911 struct orphan_data_extent *tmp;
2914 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2916 * Check for conflicting file extents
2918 * Here we don't know whether the extents is compressed or not,
2919 * so we can only assume it not compressed nor data offset,
2920 * and use its disk_len as extent length.
2922 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923 orphan->offset, orphan->disk_len, 0);
2924 btrfs_release_path(path);
2929 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930 orphan->disk_bytenr, orphan->disk_len);
2931 ret = btrfs_free_extent(trans,
2932 root->fs_info->extent_root,
2933 orphan->disk_bytenr, orphan->disk_len,
2934 0, root->objectid, orphan->objectid,
2939 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940 orphan->offset, orphan->disk_bytenr,
2941 orphan->disk_len, orphan->disk_len);
2945 /* Update file size info */
2946 rec->found_size += orphan->disk_len;
2947 if (rec->found_size == rec->nbytes)
2948 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2950 /* Update the file extent hole info too */
2951 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2955 if (RB_EMPTY_ROOT(&rec->holes))
2956 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2958 list_del(&orphan->list);
2961 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 struct inode_record *rec)
2971 struct rb_node *node;
2972 struct file_extent_hole *hole;
2976 node = rb_first(&rec->holes);
2980 hole = rb_entry(node, struct file_extent_hole, node);
2981 ret = btrfs_punch_hole(trans, root, rec->ino,
2982 hole->start, hole->len);
2985 ret = del_file_extent_hole(&rec->holes, hole->start,
2989 if (RB_EMPTY_ROOT(&rec->holes))
2990 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991 node = rb_first(&rec->holes);
2993 /* special case for a file losing all its file extent */
2995 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996 round_up(rec->isize, root->sectorsize));
3000 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001 rec->ino, root->objectid);
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3008 struct btrfs_trans_handle *trans;
3009 struct btrfs_path *path;
3012 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013 I_ERR_NO_ORPHAN_ITEM |
3014 I_ERR_LINK_COUNT_WRONG |
3015 I_ERR_NO_INODE_ITEM |
3016 I_ERR_FILE_EXTENT_ORPHAN |
3017 I_ERR_FILE_EXTENT_DISCOUNT|
3018 I_ERR_FILE_NBYTES_WRONG)))
3021 path = btrfs_alloc_path();
3026 * For nlink repair, it may create a dir and add link, so
3027 * 2 for parent(256)'s dir_index and dir_item
3028 * 2 for lost+found dir's inode_item and inode_ref
3029 * 1 for the new inode_ref of the file
3030 * 2 for lost+found dir's dir_index and dir_item for the file
3032 trans = btrfs_start_transaction(root, 7);
3033 if (IS_ERR(trans)) {
3034 btrfs_free_path(path);
3035 return PTR_ERR(trans);
3038 if (rec->errors & I_ERR_NO_INODE_ITEM)
3039 ret = repair_inode_no_item(trans, root, path, rec);
3040 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043 ret = repair_inode_discount_extent(trans, root, path, rec);
3044 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045 ret = repair_inode_isize(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047 ret = repair_inode_orphan_item(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049 ret = repair_inode_nlinks(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051 ret = repair_inode_nbytes(trans, root, path, rec);
3052 btrfs_commit_transaction(trans, root);
3053 btrfs_free_path(path);
3057 static int check_inode_recs(struct btrfs_root *root,
3058 struct cache_tree *inode_cache)
3060 struct cache_extent *cache;
3061 struct ptr_node *node;
3062 struct inode_record *rec;
3063 struct inode_backref *backref;
3068 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3070 if (btrfs_root_refs(&root->root_item) == 0) {
3071 if (!cache_tree_empty(inode_cache))
3072 fprintf(stderr, "warning line %d\n", __LINE__);
3077 * We need to record the highest inode number for later 'lost+found'
3079 * We must select an ino not used/referred by any existing inode, or
3080 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081 * this may cause 'lost+found' dir has wrong nlinks.
3083 cache = last_cache_extent(inode_cache);
3085 node = container_of(cache, struct ptr_node, cache);
3087 if (rec->ino > root->highest_inode)
3088 root->highest_inode = rec->ino;
3092 * We need to repair backrefs first because we could change some of the
3093 * errors in the inode recs.
3095 * We also need to go through and delete invalid backrefs first and then
3096 * add the correct ones second. We do this because we may get EEXIST
3097 * when adding back the correct index because we hadn't yet deleted the
3100 * For example, if we were missing a dir index then the directories
3101 * isize would be wrong, so if we fixed the isize to what we thought it
3102 * would be and then fixed the backref we'd still have a invalid fs, so
3103 * we need to add back the dir index and then check to see if the isize
3108 if (stage == 3 && !err)
3111 cache = search_cache_extent(inode_cache, 0);
3112 while (repair && cache) {
3113 node = container_of(cache, struct ptr_node, cache);
3115 cache = next_cache_extent(cache);
3117 /* Need to free everything up and rescan */
3119 remove_cache_extent(inode_cache, &node->cache);
3121 free_inode_rec(rec);
3125 if (list_empty(&rec->backrefs))
3128 ret = repair_inode_backrefs(root, rec, inode_cache,
3142 rec = get_inode_rec(inode_cache, root_dirid, 0);
3143 BUG_ON(IS_ERR(rec));
3145 ret = check_root_dir(rec);
3147 fprintf(stderr, "root %llu root dir %llu error\n",
3148 (unsigned long long)root->root_key.objectid,
3149 (unsigned long long)root_dirid);
3150 print_inode_error(root, rec);
3155 struct btrfs_trans_handle *trans;
3157 trans = btrfs_start_transaction(root, 1);
3158 if (IS_ERR(trans)) {
3159 err = PTR_ERR(trans);
3164 "root %llu missing its root dir, recreating\n",
3165 (unsigned long long)root->objectid);
3167 ret = btrfs_make_root_dir(trans, root, root_dirid);
3170 btrfs_commit_transaction(trans, root);
3174 fprintf(stderr, "root %llu root dir %llu not found\n",
3175 (unsigned long long)root->root_key.objectid,
3176 (unsigned long long)root_dirid);
3180 cache = search_cache_extent(inode_cache, 0);
3183 node = container_of(cache, struct ptr_node, cache);
3185 remove_cache_extent(inode_cache, &node->cache);
3187 if (rec->ino == root_dirid ||
3188 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189 free_inode_rec(rec);
3193 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194 ret = check_orphan_item(root, rec->ino);
3196 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197 if (can_free_inode_rec(rec)) {
3198 free_inode_rec(rec);
3203 if (!rec->found_inode_item)
3204 rec->errors |= I_ERR_NO_INODE_ITEM;
3205 if (rec->found_link != rec->nlink)
3206 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3208 ret = try_repair_inode(root, rec);
3209 if (ret == 0 && can_free_inode_rec(rec)) {
3210 free_inode_rec(rec);
3216 if (!(repair && ret == 0))
3218 print_inode_error(root, rec);
3219 list_for_each_entry(backref, &rec->backrefs, list) {
3220 if (!backref->found_dir_item)
3221 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222 if (!backref->found_dir_index)
3223 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224 if (!backref->found_inode_ref)
3225 backref->errors |= REF_ERR_NO_INODE_REF;
3226 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227 " namelen %u name %s filetype %d errors %x",
3228 (unsigned long long)backref->dir,
3229 (unsigned long long)backref->index,
3230 backref->namelen, backref->name,
3231 backref->filetype, backref->errors);
3232 print_ref_error(backref->errors);
3234 free_inode_rec(rec);
3236 return (error > 0) ? -1 : 0;
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3242 struct cache_extent *cache;
3243 struct root_record *rec = NULL;
3246 cache = lookup_cache_extent(root_cache, objectid, 1);
3248 rec = container_of(cache, struct root_record, cache);
3250 rec = calloc(1, sizeof(*rec));
3252 return ERR_PTR(-ENOMEM);
3253 rec->objectid = objectid;
3254 INIT_LIST_HEAD(&rec->backrefs);
3255 rec->cache.start = objectid;
3256 rec->cache.size = 1;
3258 ret = insert_cache_extent(root_cache, &rec->cache);
3260 return ERR_PTR(-EEXIST);
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266 u64 ref_root, u64 dir, u64 index,
3267 const char *name, int namelen)
3269 struct root_backref *backref;
3271 list_for_each_entry(backref, &rec->backrefs, list) {
3272 if (backref->ref_root != ref_root || backref->dir != dir ||
3273 backref->namelen != namelen)
3275 if (memcmp(name, backref->name, namelen))
3280 backref = calloc(1, sizeof(*backref) + namelen + 1);
3283 backref->ref_root = ref_root;
3285 backref->index = index;
3286 backref->namelen = namelen;
3287 memcpy(backref->name, name, namelen);
3288 backref->name[namelen] = '\0';
3289 list_add_tail(&backref->list, &rec->backrefs);
3293 static void free_root_record(struct cache_extent *cache)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = container_of(cache, struct root_record, cache);
3299 while (!list_empty(&rec->backrefs)) {
3300 backref = to_root_backref(rec->backrefs.next);
3301 list_del(&backref->list);
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3310 static int add_root_backref(struct cache_tree *root_cache,
3311 u64 root_id, u64 ref_root, u64 dir, u64 index,
3312 const char *name, int namelen,
3313 int item_type, int errors)
3315 struct root_record *rec;
3316 struct root_backref *backref;
3318 rec = get_root_rec(root_cache, root_id);
3319 BUG_ON(IS_ERR(rec));
3320 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3323 backref->errors |= errors;
3325 if (item_type != BTRFS_DIR_ITEM_KEY) {
3326 if (backref->found_dir_index || backref->found_back_ref ||
3327 backref->found_forward_ref) {
3328 if (backref->index != index)
3329 backref->errors |= REF_ERR_INDEX_UNMATCH;
3331 backref->index = index;
3335 if (item_type == BTRFS_DIR_ITEM_KEY) {
3336 if (backref->found_forward_ref)
3338 backref->found_dir_item = 1;
3339 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340 backref->found_dir_index = 1;
3341 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342 if (backref->found_forward_ref)
3343 backref->errors |= REF_ERR_DUP_ROOT_REF;
3344 else if (backref->found_dir_item)
3346 backref->found_forward_ref = 1;
3347 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348 if (backref->found_back_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350 backref->found_back_ref = 1;
3355 if (backref->found_forward_ref && backref->found_dir_item)
3356 backref->reachable = 1;
3360 static int merge_root_recs(struct btrfs_root *root,
3361 struct cache_tree *src_cache,
3362 struct cache_tree *dst_cache)
3364 struct cache_extent *cache;
3365 struct ptr_node *node;
3366 struct inode_record *rec;
3367 struct inode_backref *backref;
3370 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371 free_inode_recs_tree(src_cache);
3376 cache = search_cache_extent(src_cache, 0);
3379 node = container_of(cache, struct ptr_node, cache);
3381 remove_cache_extent(src_cache, &node->cache);
3384 ret = is_child_root(root, root->objectid, rec->ino);
3390 list_for_each_entry(backref, &rec->backrefs, list) {
3391 BUG_ON(backref->found_inode_ref);
3392 if (backref->found_dir_item)
3393 add_root_backref(dst_cache, rec->ino,
3394 root->root_key.objectid, backref->dir,
3395 backref->index, backref->name,
3396 backref->namelen, BTRFS_DIR_ITEM_KEY,
3398 if (backref->found_dir_index)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_INDEX_KEY,
3406 free_inode_rec(rec);
3413 static int check_root_refs(struct btrfs_root *root,
3414 struct cache_tree *root_cache)
3416 struct root_record *rec;
3417 struct root_record *ref_root;
3418 struct root_backref *backref;
3419 struct cache_extent *cache;
3425 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426 BUG_ON(IS_ERR(rec));
3429 /* fixme: this can not detect circular references */
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0)
3442 list_for_each_entry(backref, &rec->backrefs, list) {
3443 if (!backref->reachable)
3446 ref_root = get_root_rec(root_cache,
3448 BUG_ON(IS_ERR(ref_root));
3449 if (ref_root->found_ref > 0)
3452 backref->reachable = 0;
3454 if (rec->found_ref == 0)
3460 cache = search_cache_extent(root_cache, 0);
3464 rec = container_of(cache, struct root_record, cache);
3465 cache = next_cache_extent(cache);
3467 if (rec->found_ref == 0 &&
3468 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470 ret = check_orphan_item(root->fs_info->tree_root,
3476 * If we don't have a root item then we likely just have
3477 * a dir item in a snapshot for this root but no actual
3478 * ref key or anything so it's meaningless.
3480 if (!rec->found_root_item)
3483 fprintf(stderr, "fs tree %llu not referenced\n",
3484 (unsigned long long)rec->objectid);
3488 if (rec->found_ref > 0 && !rec->found_root_item)
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->found_dir_item)
3492 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493 if (!backref->found_dir_index)
3494 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495 if (!backref->found_back_ref)
3496 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497 if (!backref->found_forward_ref)
3498 backref->errors |= REF_ERR_NO_ROOT_REF;
3499 if (backref->reachable && backref->errors)
3506 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507 (unsigned long long)rec->objectid, rec->found_ref,
3508 rec->found_root_item ? "" : "not found");
3510 list_for_each_entry(backref, &rec->backrefs, list) {
3511 if (!backref->reachable)
3513 if (!backref->errors && rec->found_root_item)
3515 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516 " index %llu namelen %u name %s errors %x\n",
3517 (unsigned long long)backref->ref_root,
3518 (unsigned long long)backref->dir,
3519 (unsigned long long)backref->index,
3520 backref->namelen, backref->name,
3522 print_ref_error(backref->errors);
3525 return errors > 0 ? 1 : 0;
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529 struct btrfs_key *key,
3530 struct cache_tree *root_cache)
3536 struct btrfs_root_ref *ref;
3537 char namebuf[BTRFS_NAME_LEN];
3540 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3542 dirid = btrfs_root_ref_dirid(eb, ref);
3543 index = btrfs_root_ref_sequence(eb, ref);
3544 name_len = btrfs_root_ref_name_len(eb, ref);
3546 if (name_len <= BTRFS_NAME_LEN) {
3550 len = BTRFS_NAME_LEN;
3551 error = REF_ERR_NAME_TOO_LONG;
3553 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3555 if (key->type == BTRFS_ROOT_REF_KEY) {
3556 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557 index, namebuf, len, key->type, error);
3559 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560 index, namebuf, len, key->type, error);
3565 static void free_corrupt_block(struct cache_extent *cache)
3567 struct btrfs_corrupt_block *corrupt;
3569 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3576 * Repair the btree of the given root.
3578 * The fix is to remove the node key in corrupt_blocks cache_tree.
3579 * and rebalance the tree.
3580 * After the fix, the btree should be writeable.
3582 static int repair_btree(struct btrfs_root *root,
3583 struct cache_tree *corrupt_blocks)
3585 struct btrfs_trans_handle *trans;
3586 struct btrfs_path *path;
3587 struct btrfs_corrupt_block *corrupt;
3588 struct cache_extent *cache;
3589 struct btrfs_key key;
3594 if (cache_tree_empty(corrupt_blocks))
3597 path = btrfs_alloc_path();
3601 trans = btrfs_start_transaction(root, 1);
3602 if (IS_ERR(trans)) {
3603 ret = PTR_ERR(trans);
3604 fprintf(stderr, "Error starting transaction: %s\n",
3608 cache = first_cache_extent(corrupt_blocks);
3610 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612 level = corrupt->level;
3613 path->lowest_level = level;
3614 key.objectid = corrupt->key.objectid;
3615 key.type = corrupt->key.type;
3616 key.offset = corrupt->key.offset;
3619 * Here we don't want to do any tree balance, since it may
3620 * cause a balance with corrupted brother leaf/node,
3621 * so ins_len set to 0 here.
3622 * Balance will be done after all corrupt node/leaf is deleted.
3624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3627 offset = btrfs_node_blockptr(path->nodes[level],
3628 path->slots[level]);
3630 /* Remove the ptr */
3631 ret = btrfs_del_ptr(trans, root, path, level,
3632 path->slots[level]);
3636 * Remove the corresponding extent
3637 * return value is not concerned.
3639 btrfs_release_path(path);
3640 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641 0, root->root_key.objectid,
3643 cache = next_cache_extent(cache);
3646 /* Balance the btree using btrfs_search_slot() */
3647 cache = first_cache_extent(corrupt_blocks);
3649 corrupt = container_of(cache, struct btrfs_corrupt_block,
3651 memcpy(&key, &corrupt->key, sizeof(key));
3652 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3655 /* return will always >0 since it won't find the item */
3657 btrfs_release_path(path);
3658 cache = next_cache_extent(cache);
3661 btrfs_commit_transaction(trans, root);
3663 btrfs_free_path(path);
3667 static int check_fs_root(struct btrfs_root *root,
3668 struct cache_tree *root_cache,
3669 struct walk_control *wc)
3675 struct btrfs_path path;
3676 struct shared_node root_node;
3677 struct root_record *rec;
3678 struct btrfs_root_item *root_item = &root->root_item;
3679 struct cache_tree corrupt_blocks;
3680 struct orphan_data_extent *orphan;
3681 struct orphan_data_extent *tmp;
3682 enum btrfs_tree_block_status status;
3683 struct node_refs nrefs;
3686 * Reuse the corrupt_block cache tree to record corrupted tree block
3688 * Unlike the usage in extent tree check, here we do it in a per
3689 * fs/subvol tree base.
3691 cache_tree_init(&corrupt_blocks);
3692 root->fs_info->corrupt_blocks = &corrupt_blocks;
3694 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695 rec = get_root_rec(root_cache, root->root_key.objectid);
3696 BUG_ON(IS_ERR(rec));
3697 if (btrfs_root_refs(root_item) > 0)
3698 rec->found_root_item = 1;
3701 btrfs_init_path(&path);
3702 memset(&root_node, 0, sizeof(root_node));
3703 cache_tree_init(&root_node.root_cache);
3704 cache_tree_init(&root_node.inode_cache);
3705 memset(&nrefs, 0, sizeof(nrefs));
3707 /* Move the orphan extent record to corresponding inode_record */
3708 list_for_each_entry_safe(orphan, tmp,
3709 &root->orphan_data_extents, list) {
3710 struct inode_record *inode;
3712 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3714 BUG_ON(IS_ERR(inode));
3715 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716 list_move(&orphan->list, &inode->orphan_extents);
3719 level = btrfs_header_level(root->node);
3720 memset(wc->nodes, 0, sizeof(wc->nodes));
3721 wc->nodes[level] = &root_node;
3722 wc->active_node = level;
3723 wc->root_level = level;
3725 /* We may not have checked the root block, lets do that now */
3726 if (btrfs_is_leaf(root->node))
3727 status = btrfs_check_leaf(root, NULL, root->node);
3729 status = btrfs_check_node(root, NULL, root->node);
3730 if (status != BTRFS_TREE_BLOCK_CLEAN)
3733 if (btrfs_root_refs(root_item) > 0 ||
3734 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735 path.nodes[level] = root->node;
3736 extent_buffer_get(root->node);
3737 path.slots[level] = 0;
3739 struct btrfs_key key;
3740 struct btrfs_disk_key found_key;
3742 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743 level = root_item->drop_level;
3744 path.lowest_level = level;
3745 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3748 btrfs_node_key(path.nodes[level], &found_key,
3750 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751 sizeof(found_key)));
3755 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3761 wret = walk_up_tree(root, &path, wc, &level);
3768 btrfs_release_path(&path);
3770 if (!cache_tree_empty(&corrupt_blocks)) {
3771 struct cache_extent *cache;
3772 struct btrfs_corrupt_block *corrupt;
3774 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775 root->root_key.objectid);
3776 cache = first_cache_extent(&corrupt_blocks);
3778 corrupt = container_of(cache,
3779 struct btrfs_corrupt_block,
3781 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782 cache->start, corrupt->level,
3783 corrupt->key.objectid, corrupt->key.type,
3784 corrupt->key.offset);
3785 cache = next_cache_extent(cache);
3788 printf("Try to repair the btree for root %llu\n",
3789 root->root_key.objectid);
3790 ret = repair_btree(root, &corrupt_blocks);
3792 fprintf(stderr, "Failed to repair btree: %s\n",
3795 printf("Btree for root %llu is fixed\n",
3796 root->root_key.objectid);
3800 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3804 if (root_node.current) {
3805 root_node.current->checked = 1;
3806 maybe_free_inode_rec(&root_node.inode_cache,
3810 err = check_inode_recs(root, &root_node.inode_cache);
3814 free_corrupt_blocks_tree(&corrupt_blocks);
3815 root->fs_info->corrupt_blocks = NULL;
3816 free_orphan_data_extents(&root->orphan_data_extents);
3820 static int fs_root_objectid(u64 objectid)
3822 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3825 return is_fstree(objectid);
3828 static int check_fs_roots(struct btrfs_root *root,
3829 struct cache_tree *root_cache)
3831 struct btrfs_path path;
3832 struct btrfs_key key;
3833 struct walk_control wc;
3834 struct extent_buffer *leaf, *tree_node;
3835 struct btrfs_root *tmp_root;
3836 struct btrfs_root *tree_root = root->fs_info->tree_root;
3840 if (ctx.progress_enabled) {
3841 ctx.tp = TASK_FS_ROOTS;
3842 task_start(ctx.info);
3846 * Just in case we made any changes to the extent tree that weren't
3847 * reflected into the free space cache yet.
3850 reset_cached_block_groups(root->fs_info);
3851 memset(&wc, 0, sizeof(wc));
3852 cache_tree_init(&wc.shared);
3853 btrfs_init_path(&path);
3858 key.type = BTRFS_ROOT_ITEM_KEY;
3859 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3864 tree_node = tree_root->node;
3866 if (tree_node != tree_root->node) {
3867 free_root_recs_tree(root_cache);
3868 btrfs_release_path(&path);
3871 leaf = path.nodes[0];
3872 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873 ret = btrfs_next_leaf(tree_root, &path);
3879 leaf = path.nodes[0];
3881 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883 fs_root_objectid(key.objectid)) {
3884 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885 tmp_root = btrfs_read_fs_root_no_cache(
3886 root->fs_info, &key);
3888 key.offset = (u64)-1;
3889 tmp_root = btrfs_read_fs_root(
3890 root->fs_info, &key);
3892 if (IS_ERR(tmp_root)) {
3896 ret = check_fs_root(tmp_root, root_cache, &wc);
3897 if (ret == -EAGAIN) {
3898 free_root_recs_tree(root_cache);
3899 btrfs_release_path(&path);
3904 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905 btrfs_free_fs_root(tmp_root);
3906 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907 key.type == BTRFS_ROOT_BACKREF_KEY) {
3908 process_root_ref(leaf, path.slots[0], &key,
3915 btrfs_release_path(&path);
3917 free_extent_cache_tree(&wc.shared);
3918 if (!cache_tree_empty(&wc.shared))
3919 fprintf(stderr, "warning line %d\n", __LINE__);
3921 task_stop(ctx.info);
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3929 struct extent_backref *back;
3930 struct tree_backref *tback;
3931 struct data_backref *dback;
3935 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936 back = rb_node_to_extent_backref(n);
3937 if (!back->found_extent_tree) {
3941 if (back->is_data) {
3942 dback = to_data_backref(back);
3943 fprintf(stderr, "Backref %llu %s %llu"
3944 " owner %llu offset %llu num_refs %lu"
3945 " not found in extent tree\n",
3946 (unsigned long long)rec->start,
3947 back->full_backref ?
3949 back->full_backref ?
3950 (unsigned long long)dback->parent:
3951 (unsigned long long)dback->root,
3952 (unsigned long long)dback->owner,
3953 (unsigned long long)dback->offset,
3954 (unsigned long)dback->num_refs);
3956 tback = to_tree_backref(back);
3957 fprintf(stderr, "Backref %llu parent %llu"
3958 " root %llu not found in extent tree\n",
3959 (unsigned long long)rec->start,
3960 (unsigned long long)tback->parent,
3961 (unsigned long long)tback->root);
3964 if (!back->is_data && !back->found_ref) {
3968 tback = to_tree_backref(back);
3969 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970 (unsigned long long)rec->start,
3971 back->full_backref ? "parent" : "root",
3972 back->full_backref ?
3973 (unsigned long long)tback->parent :
3974 (unsigned long long)tback->root, back);
3976 if (back->is_data) {
3977 dback = to_data_backref(back);
3978 if (dback->found_ref != dback->num_refs) {
3982 fprintf(stderr, "Incorrect local backref count"
3983 " on %llu %s %llu owner %llu"
3984 " offset %llu found %u wanted %u back %p\n",
3985 (unsigned long long)rec->start,
3986 back->full_backref ?
3988 back->full_backref ?
3989 (unsigned long long)dback->parent:
3990 (unsigned long long)dback->root,
3991 (unsigned long long)dback->owner,
3992 (unsigned long long)dback->offset,
3993 dback->found_ref, dback->num_refs, back);
3995 if (dback->disk_bytenr != rec->start) {
3999 fprintf(stderr, "Backref disk bytenr does not"
4000 " match extent record, bytenr=%llu, "
4001 "ref bytenr=%llu\n",
4002 (unsigned long long)rec->start,
4003 (unsigned long long)dback->disk_bytenr);
4006 if (dback->bytes != rec->nr) {
4010 fprintf(stderr, "Backref bytes do not match "
4011 "extent backref, bytenr=%llu, ref "
4012 "bytes=%llu, backref bytes=%llu\n",
4013 (unsigned long long)rec->start,
4014 (unsigned long long)rec->nr,
4015 (unsigned long long)dback->bytes);
4018 if (!back->is_data) {
4021 dback = to_data_backref(back);
4022 found += dback->found_ref;
4025 if (found != rec->refs) {
4029 fprintf(stderr, "Incorrect global backref count "
4030 "on %llu found %llu wanted %llu\n",
4031 (unsigned long long)rec->start,
4032 (unsigned long long)found,
4033 (unsigned long long)rec->refs);
4039 static void __free_one_backref(struct rb_node *node)
4041 struct extent_backref *back = rb_node_to_extent_backref(node);
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4048 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052 struct cache_tree *extent_cache)
4054 struct cache_extent *cache;
4055 struct extent_record *rec;
4058 cache = first_cache_extent(extent_cache);
4061 rec = container_of(cache, struct extent_record, cache);
4062 remove_cache_extent(extent_cache, cache);
4063 free_all_extent_backrefs(rec);
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069 struct extent_record *rec)
4071 if (rec->content_checked && rec->owner_ref_checked &&
4072 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074 !rec->bad_full_backref && !rec->crossing_stripes &&
4075 !rec->wrong_chunk_type) {
4076 remove_cache_extent(extent_cache, &rec->cache);
4077 free_all_extent_backrefs(rec);
4078 list_del_init(&rec->list);
4084 static int check_owner_ref(struct btrfs_root *root,
4085 struct extent_record *rec,
4086 struct extent_buffer *buf)
4088 struct extent_backref *node, *tmp;
4089 struct tree_backref *back;
4090 struct btrfs_root *ref_root;
4091 struct btrfs_key key;
4092 struct btrfs_path path;
4093 struct extent_buffer *parent;
4098 rbtree_postorder_for_each_entry_safe(node, tmp,
4099 &rec->backref_tree, node) {
4102 if (!node->found_ref)
4104 if (node->full_backref)
4106 back = to_tree_backref(node);
4107 if (btrfs_header_owner(buf) == back->root)
4110 BUG_ON(rec->is_root);
4112 /* try to find the block by search corresponding fs tree */
4113 key.objectid = btrfs_header_owner(buf);
4114 key.type = BTRFS_ROOT_ITEM_KEY;
4115 key.offset = (u64)-1;
4117 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118 if (IS_ERR(ref_root))
4121 level = btrfs_header_level(buf);
4123 btrfs_item_key_to_cpu(buf, &key, 0);
4125 btrfs_node_key_to_cpu(buf, &key, 0);
4127 btrfs_init_path(&path);
4128 path.lowest_level = level + 1;
4129 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4133 parent = path.nodes[level + 1];
4134 if (parent && buf->start == btrfs_node_blockptr(parent,
4135 path.slots[level + 1]))
4138 btrfs_release_path(&path);
4139 return found ? 0 : 1;
4142 static int is_extent_tree_record(struct extent_record *rec)
4144 struct extent_backref *ref, *tmp;
4145 struct tree_backref *back;
4148 rbtree_postorder_for_each_entry_safe(ref, tmp,
4149 &rec->backref_tree, node) {
4152 back = to_tree_backref(ref);
4153 if (ref->full_backref)
4155 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163 struct cache_tree *extent_cache,
4166 struct extent_record *rec;
4167 struct cache_extent *cache;
4168 struct btrfs_key key;
4170 cache = lookup_cache_extent(extent_cache, start, len);
4174 rec = container_of(cache, struct extent_record, cache);
4175 if (!is_extent_tree_record(rec))
4178 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183 struct extent_buffer *buf, int slot)
4185 if (btrfs_header_level(buf)) {
4186 struct btrfs_key_ptr ptr1, ptr2;
4188 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189 sizeof(struct btrfs_key_ptr));
4190 read_extent_buffer(buf, &ptr2,
4191 btrfs_node_key_ptr_offset(slot + 1),
4192 sizeof(struct btrfs_key_ptr));
4193 write_extent_buffer(buf, &ptr1,
4194 btrfs_node_key_ptr_offset(slot + 1),
4195 sizeof(struct btrfs_key_ptr));
4196 write_extent_buffer(buf, &ptr2,
4197 btrfs_node_key_ptr_offset(slot),
4198 sizeof(struct btrfs_key_ptr));
4200 struct btrfs_disk_key key;
4201 btrfs_node_key(buf, &key, 0);
4202 btrfs_fixup_low_keys(root, path, &key,
4203 btrfs_header_level(buf) + 1);
4206 struct btrfs_item *item1, *item2;
4207 struct btrfs_key k1, k2;
4208 char *item1_data, *item2_data;
4209 u32 item1_offset, item2_offset, item1_size, item2_size;
4211 item1 = btrfs_item_nr(slot);
4212 item2 = btrfs_item_nr(slot + 1);
4213 btrfs_item_key_to_cpu(buf, &k1, slot);
4214 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215 item1_offset = btrfs_item_offset(buf, item1);
4216 item2_offset = btrfs_item_offset(buf, item2);
4217 item1_size = btrfs_item_size(buf, item1);
4218 item2_size = btrfs_item_size(buf, item2);
4220 item1_data = malloc(item1_size);
4223 item2_data = malloc(item2_size);
4229 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4232 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4237 btrfs_set_item_offset(buf, item1, item2_offset);
4238 btrfs_set_item_offset(buf, item2, item1_offset);
4239 btrfs_set_item_size(buf, item1, item2_size);
4240 btrfs_set_item_size(buf, item2, item1_size);
4242 path->slots[0] = slot;
4243 btrfs_set_item_key_unsafe(root, path, &k2);
4244 path->slots[0] = slot + 1;
4245 btrfs_set_item_key_unsafe(root, path, &k1);
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251 struct btrfs_root *root,
4252 struct btrfs_path *path)
4254 struct extent_buffer *buf;
4255 struct btrfs_key k1, k2;
4257 int level = path->lowest_level;
4260 buf = path->nodes[level];
4261 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4263 btrfs_node_key_to_cpu(buf, &k1, i);
4264 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4266 btrfs_item_key_to_cpu(buf, &k1, i);
4267 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4269 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4271 ret = swap_values(root, path, buf, i);
4274 btrfs_mark_buffer_dirty(buf);
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281 struct btrfs_root *root,
4282 struct btrfs_path *path,
4283 struct extent_buffer *buf, int slot)
4285 struct btrfs_key key;
4286 int nritems = btrfs_header_nritems(buf);
4288 btrfs_item_key_to_cpu(buf, &key, slot);
4290 /* These are all the keys we can deal with missing. */
4291 if (key.type != BTRFS_DIR_INDEX_KEY &&
4292 key.type != BTRFS_EXTENT_ITEM_KEY &&
4293 key.type != BTRFS_METADATA_ITEM_KEY &&
4294 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4298 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299 (unsigned long long)key.objectid, key.type,
4300 (unsigned long long)key.offset, slot, buf->start);
4301 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302 btrfs_item_nr_offset(slot + 1),
4303 sizeof(struct btrfs_item) *
4304 (nritems - slot - 1));
4305 btrfs_set_header_nritems(buf, nritems - 1);
4307 struct btrfs_disk_key disk_key;
4309 btrfs_item_key(buf, &disk_key, 0);
4310 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4312 btrfs_mark_buffer_dirty(buf);
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317 struct btrfs_root *root,
4318 struct btrfs_path *path)
4320 struct extent_buffer *buf;
4324 /* We should only get this for leaves */
4325 BUG_ON(path->lowest_level);
4326 buf = path->nodes[0];
4328 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329 unsigned int shift = 0, offset;
4331 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332 BTRFS_LEAF_DATA_SIZE(root)) {
4333 if (btrfs_item_end_nr(buf, i) >
4334 BTRFS_LEAF_DATA_SIZE(root)) {
4335 ret = delete_bogus_item(trans, root, path,
4339 fprintf(stderr, "item is off the end of the "
4340 "leaf, can't fix\n");
4344 shift = BTRFS_LEAF_DATA_SIZE(root) -
4345 btrfs_item_end_nr(buf, i);
4346 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347 btrfs_item_offset_nr(buf, i - 1)) {
4348 if (btrfs_item_end_nr(buf, i) >
4349 btrfs_item_offset_nr(buf, i - 1)) {
4350 ret = delete_bogus_item(trans, root, path,
4354 fprintf(stderr, "items overlap, can't fix\n");
4358 shift = btrfs_item_offset_nr(buf, i - 1) -
4359 btrfs_item_end_nr(buf, i);
4364 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365 i, shift, (unsigned long long)buf->start);
4366 offset = btrfs_item_offset_nr(buf, i);
4367 memmove_extent_buffer(buf,
4368 btrfs_leaf_data(buf) + offset + shift,
4369 btrfs_leaf_data(buf) + offset,
4370 btrfs_item_size_nr(buf, i));
4371 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4373 btrfs_mark_buffer_dirty(buf);
4377 * We may have moved things, in which case we want to exit so we don't
4378 * write those changes out. Once we have proper abort functionality in
4379 * progs this can be changed to something nicer.
4386 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4387 * then just return -EIO.
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390 struct extent_buffer *buf,
4391 enum btrfs_tree_block_status status)
4393 struct btrfs_trans_handle *trans;
4394 struct ulist *roots;
4395 struct ulist_node *node;
4396 struct btrfs_root *search_root;
4397 struct btrfs_path *path;
4398 struct ulist_iterator iter;
4399 struct btrfs_key root_key, key;
4402 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4406 path = btrfs_alloc_path();
4410 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4413 btrfs_free_path(path);
4417 ULIST_ITER_INIT(&iter);
4418 while ((node = ulist_next(roots, &iter))) {
4419 root_key.objectid = node->val;
4420 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421 root_key.offset = (u64)-1;
4423 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4430 trans = btrfs_start_transaction(search_root, 0);
4431 if (IS_ERR(trans)) {
4432 ret = PTR_ERR(trans);
4436 path->lowest_level = btrfs_header_level(buf);
4437 path->skip_check_block = 1;
4438 if (path->lowest_level)
4439 btrfs_node_key_to_cpu(buf, &key, 0);
4441 btrfs_item_key_to_cpu(buf, &key, 0);
4442 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4445 btrfs_commit_transaction(trans, search_root);
4448 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449 ret = fix_key_order(trans, search_root, path);
4450 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451 ret = fix_item_offset(trans, search_root, path);
4453 btrfs_commit_transaction(trans, search_root);
4456 btrfs_release_path(path);
4457 btrfs_commit_transaction(trans, search_root);
4460 btrfs_free_path(path);
4464 static int check_block(struct btrfs_root *root,
4465 struct cache_tree *extent_cache,
4466 struct extent_buffer *buf, u64 flags)
4468 struct extent_record *rec;
4469 struct cache_extent *cache;
4470 struct btrfs_key key;
4471 enum btrfs_tree_block_status status;
4475 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4478 rec = container_of(cache, struct extent_record, cache);
4479 rec->generation = btrfs_header_generation(buf);
4481 level = btrfs_header_level(buf);
4482 if (btrfs_header_nritems(buf) > 0) {
4485 btrfs_item_key_to_cpu(buf, &key, 0);
4487 btrfs_node_key_to_cpu(buf, &key, 0);
4489 rec->info_objectid = key.objectid;
4491 rec->info_level = level;
4493 if (btrfs_is_leaf(buf))
4494 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4496 status = btrfs_check_node(root, &rec->parent_key, buf);
4498 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4500 status = try_to_fix_bad_block(root, buf, status);
4501 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4503 fprintf(stderr, "bad block %llu\n",
4504 (unsigned long long)buf->start);
4507 * Signal to callers we need to start the scan over
4508 * again since we'll have cowed blocks.
4513 rec->content_checked = 1;
4514 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515 rec->owner_ref_checked = 1;
4517 ret = check_owner_ref(root, rec, buf);
4519 rec->owner_ref_checked = 1;
4523 maybe_free_extent_rec(extent_cache, rec);
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529 u64 parent, u64 root)
4531 struct rb_node *node;
4532 struct tree_backref *back = NULL;
4533 struct tree_backref match = {
4540 match.parent = parent;
4541 match.node.full_backref = 1;
4546 node = rb_search(&rec->backref_tree, &match.node.node,
4547 (rb_compare_keys)compare_extent_backref, NULL);
4549 back = to_tree_backref(rb_node_to_extent_backref(node));
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555 u64 parent, u64 root)
4557 struct tree_backref *ref = malloc(sizeof(*ref));
4561 memset(&ref->node, 0, sizeof(ref->node));
4563 ref->parent = parent;
4564 ref->node.full_backref = 1;
4567 ref->node.full_backref = 0;
4569 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575 u64 parent, u64 root,
4576 u64 owner, u64 offset,
4578 u64 disk_bytenr, u64 bytes)
4580 struct rb_node *node;
4581 struct data_backref *back = NULL;
4582 struct data_backref match = {
4589 .found_ref = found_ref,
4590 .disk_bytenr = disk_bytenr,
4594 match.parent = parent;
4595 match.node.full_backref = 1;
4600 node = rb_search(&rec->backref_tree, &match.node.node,
4601 (rb_compare_keys)compare_extent_backref, NULL);
4603 back = to_data_backref(rb_node_to_extent_backref(node));
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609 u64 parent, u64 root,
4610 u64 owner, u64 offset,
4613 struct data_backref *ref = malloc(sizeof(*ref));
4617 memset(&ref->node, 0, sizeof(ref->node));
4618 ref->node.is_data = 1;
4621 ref->parent = parent;
4624 ref->node.full_backref = 1;
4628 ref->offset = offset;
4629 ref->node.full_backref = 0;
4631 ref->bytes = max_size;
4634 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635 if (max_size > rec->max_size)
4636 rec->max_size = max_size;
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4643 struct btrfs_block_group_cache *bg_cache;
4645 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4649 /* data extent, check chunk directly*/
4650 if (!rec->metadata) {
4651 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652 rec->wrong_chunk_type = 1;
4656 /* metadata extent, check the obvious case first */
4657 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658 BTRFS_BLOCK_GROUP_METADATA))) {
4659 rec->wrong_chunk_type = 1;
4664 * Check SYSTEM extent, as it's also marked as metadata, we can only
4665 * make sure it's a SYSTEM extent by its backref
4667 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668 struct extent_backref *node;
4669 struct tree_backref *tback;
4672 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673 if (node->is_data) {
4674 /* tree block shouldn't have data backref */
4675 rec->wrong_chunk_type = 1;
4678 tback = container_of(node, struct tree_backref, node);
4680 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4683 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684 if (!(bg_cache->flags & bg_type))
4685 rec->wrong_chunk_type = 1;
4690 * Allocate a new extent record, fill default values from @tmpl and insert int
4691 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692 * the cache, otherwise it fails.
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695 struct extent_record *tmpl)
4697 struct extent_record *rec;
4700 rec = malloc(sizeof(*rec));
4703 rec->start = tmpl->start;
4704 rec->max_size = tmpl->max_size;
4705 rec->nr = max(tmpl->nr, tmpl->max_size);
4706 rec->found_rec = tmpl->found_rec;
4707 rec->content_checked = tmpl->content_checked;
4708 rec->owner_ref_checked = tmpl->owner_ref_checked;
4709 rec->num_duplicates = 0;
4710 rec->metadata = tmpl->metadata;
4711 rec->flag_block_full_backref = FLAG_UNSET;
4712 rec->bad_full_backref = 0;
4713 rec->crossing_stripes = 0;
4714 rec->wrong_chunk_type = 0;
4715 rec->is_root = tmpl->is_root;
4716 rec->refs = tmpl->refs;
4717 rec->extent_item_refs = tmpl->extent_item_refs;
4718 rec->parent_generation = tmpl->parent_generation;
4719 INIT_LIST_HEAD(&rec->backrefs);
4720 INIT_LIST_HEAD(&rec->dups);
4721 INIT_LIST_HEAD(&rec->list);
4722 rec->backref_tree = RB_ROOT;
4723 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724 rec->cache.start = tmpl->start;
4725 rec->cache.size = tmpl->nr;
4726 ret = insert_cache_extent(extent_cache, &rec->cache);
4728 bytes_used += rec->nr;
4731 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732 global_info->tree_root->nodesize);
4733 check_extent_type(rec);
4738 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4740 * - refs - if found, increase refs
4741 * - is_root - if found, set
4742 * - content_checked - if found, set
4743 * - owner_ref_checked - if found, set
4745 * If not found, create a new one, initialize and insert.
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748 struct extent_record *tmpl)
4750 struct extent_record *rec;
4751 struct cache_extent *cache;
4755 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4757 rec = container_of(cache, struct extent_record, cache);
4761 rec->nr = max(tmpl->nr, tmpl->max_size);
4764 * We need to make sure to reset nr to whatever the extent
4765 * record says was the real size, this way we can compare it to
4768 if (tmpl->found_rec) {
4769 if (tmpl->start != rec->start || rec->found_rec) {
4770 struct extent_record *tmp;
4773 if (list_empty(&rec->list))
4774 list_add_tail(&rec->list,
4775 &duplicate_extents);
4778 * We have to do this song and dance in case we
4779 * find an extent record that falls inside of
4780 * our current extent record but does not have
4781 * the same objectid.
4783 tmp = malloc(sizeof(*tmp));
4786 tmp->start = tmpl->start;
4787 tmp->max_size = tmpl->max_size;
4790 tmp->metadata = tmpl->metadata;
4791 tmp->extent_item_refs = tmpl->extent_item_refs;
4792 INIT_LIST_HEAD(&tmp->list);
4793 list_add_tail(&tmp->list, &rec->dups);
4794 rec->num_duplicates++;
4801 if (tmpl->extent_item_refs && !dup) {
4802 if (rec->extent_item_refs) {
4803 fprintf(stderr, "block %llu rec "
4804 "extent_item_refs %llu, passed %llu\n",
4805 (unsigned long long)tmpl->start,
4806 (unsigned long long)
4807 rec->extent_item_refs,
4808 (unsigned long long)tmpl->extent_item_refs);
4810 rec->extent_item_refs = tmpl->extent_item_refs;
4814 if (tmpl->content_checked)
4815 rec->content_checked = 1;
4816 if (tmpl->owner_ref_checked)
4817 rec->owner_ref_checked = 1;
4818 memcpy(&rec->parent_key, &tmpl->parent_key,
4819 sizeof(tmpl->parent_key));
4820 if (tmpl->parent_generation)
4821 rec->parent_generation = tmpl->parent_generation;
4822 if (rec->max_size < tmpl->max_size)
4823 rec->max_size = tmpl->max_size;
4826 * A metadata extent can't cross stripe_len boundary, otherwise
4827 * kernel scrub won't be able to handle it.
4828 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4832 rec->crossing_stripes = check_crossing_stripes(
4833 rec->start, global_info->tree_root->nodesize);
4834 check_extent_type(rec);
4835 maybe_free_extent_rec(extent_cache, rec);
4839 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845 u64 parent, u64 root, int found_ref)
4847 struct extent_record *rec;
4848 struct tree_backref *back;
4849 struct cache_extent *cache;
4851 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4853 struct extent_record tmpl;
4855 memset(&tmpl, 0, sizeof(tmpl));
4856 tmpl.start = bytenr;
4860 add_extent_rec_nolookup(extent_cache, &tmpl);
4862 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4867 rec = container_of(cache, struct extent_record, cache);
4868 if (rec->start != bytenr) {
4872 back = find_tree_backref(rec, parent, root);
4874 back = alloc_tree_backref(rec, parent, root);
4879 if (back->node.found_ref) {
4880 fprintf(stderr, "Extent back ref already exists "
4881 "for %llu parent %llu root %llu \n",
4882 (unsigned long long)bytenr,
4883 (unsigned long long)parent,
4884 (unsigned long long)root);
4886 back->node.found_ref = 1;
4888 if (back->node.found_extent_tree) {
4889 fprintf(stderr, "Extent back ref already exists "
4890 "for %llu parent %llu root %llu \n",
4891 (unsigned long long)bytenr,
4892 (unsigned long long)parent,
4893 (unsigned long long)root);
4895 back->node.found_extent_tree = 1;
4897 check_extent_type(rec);
4898 maybe_free_extent_rec(extent_cache, rec);
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903 u64 parent, u64 root, u64 owner, u64 offset,
4904 u32 num_refs, int found_ref, u64 max_size)
4906 struct extent_record *rec;
4907 struct data_backref *back;
4908 struct cache_extent *cache;
4910 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4912 struct extent_record tmpl;
4914 memset(&tmpl, 0, sizeof(tmpl));
4915 tmpl.start = bytenr;
4917 tmpl.max_size = max_size;
4919 add_extent_rec_nolookup(extent_cache, &tmpl);
4921 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4926 rec = container_of(cache, struct extent_record, cache);
4927 if (rec->max_size < max_size)
4928 rec->max_size = max_size;
4931 * If found_ref is set then max_size is the real size and must match the
4932 * existing refs. So if we have already found a ref then we need to
4933 * make sure that this ref matches the existing one, otherwise we need
4934 * to add a new backref so we can notice that the backrefs don't match
4935 * and we need to figure out who is telling the truth. This is to
4936 * account for that awful fsync bug I introduced where we'd end up with
4937 * a btrfs_file_extent_item that would have its length include multiple
4938 * prealloc extents or point inside of a prealloc extent.
4940 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4943 back = alloc_data_backref(rec, parent, root, owner, offset,
4949 BUG_ON(num_refs != 1);
4950 if (back->node.found_ref)
4951 BUG_ON(back->bytes != max_size);
4952 back->node.found_ref = 1;
4953 back->found_ref += 1;
4954 back->bytes = max_size;
4955 back->disk_bytenr = bytenr;
4957 rec->content_checked = 1;
4958 rec->owner_ref_checked = 1;
4960 if (back->node.found_extent_tree) {
4961 fprintf(stderr, "Extent back ref already exists "
4962 "for %llu parent %llu root %llu "
4963 "owner %llu offset %llu num_refs %lu\n",
4964 (unsigned long long)bytenr,
4965 (unsigned long long)parent,
4966 (unsigned long long)root,
4967 (unsigned long long)owner,
4968 (unsigned long long)offset,
4969 (unsigned long)num_refs);
4971 back->num_refs = num_refs;
4972 back->node.found_extent_tree = 1;
4974 maybe_free_extent_rec(extent_cache, rec);
4978 static int add_pending(struct cache_tree *pending,
4979 struct cache_tree *seen, u64 bytenr, u32 size)
4982 ret = add_cache_extent(seen, bytenr, size);
4985 add_cache_extent(pending, bytenr, size);
4989 static int pick_next_pending(struct cache_tree *pending,
4990 struct cache_tree *reada,
4991 struct cache_tree *nodes,
4992 u64 last, struct block_info *bits, int bits_nr,
4995 unsigned long node_start = last;
4996 struct cache_extent *cache;
4999 cache = search_cache_extent(reada, 0);
5001 bits[0].start = cache->start;
5002 bits[0].size = cache->size;
5007 if (node_start > 32768)
5008 node_start -= 32768;
5010 cache = search_cache_extent(nodes, node_start);
5012 cache = search_cache_extent(nodes, 0);
5015 cache = search_cache_extent(pending, 0);
5020 bits[ret].start = cache->start;
5021 bits[ret].size = cache->size;
5022 cache = next_cache_extent(cache);
5024 } while (cache && ret < bits_nr);
5030 bits[ret].start = cache->start;
5031 bits[ret].size = cache->size;
5032 cache = next_cache_extent(cache);
5034 } while (cache && ret < bits_nr);
5036 if (bits_nr - ret > 8) {
5037 u64 lookup = bits[0].start + bits[0].size;
5038 struct cache_extent *next;
5039 next = search_cache_extent(pending, lookup);
5041 if (next->start - lookup > 32768)
5043 bits[ret].start = next->start;
5044 bits[ret].size = next->size;
5045 lookup = next->start + next->size;
5049 next = next_cache_extent(next);
5057 static void free_chunk_record(struct cache_extent *cache)
5059 struct chunk_record *rec;
5061 rec = container_of(cache, struct chunk_record, cache);
5062 list_del_init(&rec->list);
5063 list_del_init(&rec->dextents);
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5069 cache_tree_free_extents(chunk_cache, free_chunk_record);
5072 static void free_device_record(struct rb_node *node)
5074 struct device_record *rec;
5076 rec = container_of(node, struct device_record, node);
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5082 int insert_block_group_record(struct block_group_tree *tree,
5083 struct block_group_record *bg_rec)
5087 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5091 list_add_tail(&bg_rec->list, &tree->block_groups);
5095 static void free_block_group_record(struct cache_extent *cache)
5097 struct block_group_record *rec;
5099 rec = container_of(cache, struct block_group_record, cache);
5100 list_del_init(&rec->list);
5104 void free_block_group_tree(struct block_group_tree *tree)
5106 cache_tree_free_extents(&tree->tree, free_block_group_record);
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110 struct device_extent_record *de_rec)
5115 * Device extent is a bit different from the other extents, because
5116 * the extents which belong to the different devices may have the
5117 * same start and size, so we need use the special extent cache
5118 * search/insert functions.
5120 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5124 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5129 static void free_device_extent_record(struct cache_extent *cache)
5131 struct device_extent_record *rec;
5133 rec = container_of(cache, struct device_extent_record, cache);
5134 if (!list_empty(&rec->chunk_list))
5135 list_del_init(&rec->chunk_list);
5136 if (!list_empty(&rec->device_list))
5137 list_del_init(&rec->device_list);
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5143 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148 struct extent_buffer *leaf, int slot)
5150 struct btrfs_extent_ref_v0 *ref0;
5151 struct btrfs_key key;
5153 btrfs_item_key_to_cpu(leaf, &key, slot);
5154 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5158 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166 struct btrfs_key *key,
5169 struct btrfs_chunk *ptr;
5170 struct chunk_record *rec;
5173 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5176 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5178 fprintf(stderr, "memory allocation failed\n");
5182 INIT_LIST_HEAD(&rec->list);
5183 INIT_LIST_HEAD(&rec->dextents);
5186 rec->cache.start = key->offset;
5187 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5189 rec->generation = btrfs_header_generation(leaf);
5191 rec->objectid = key->objectid;
5192 rec->type = key->type;
5193 rec->offset = key->offset;
5195 rec->length = rec->cache.size;
5196 rec->owner = btrfs_chunk_owner(leaf, ptr);
5197 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202 rec->num_stripes = num_stripes;
5203 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5205 for (i = 0; i < rec->num_stripes; ++i) {
5206 rec->stripes[i].devid =
5207 btrfs_stripe_devid_nr(leaf, ptr, i);
5208 rec->stripes[i].offset =
5209 btrfs_stripe_offset_nr(leaf, ptr, i);
5210 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219 struct btrfs_key *key, struct extent_buffer *eb,
5222 struct chunk_record *rec;
5223 struct btrfs_chunk *chunk;
5226 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5228 * Do extra check for this chunk item,
5230 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5231 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5232 * and owner<->key_type check.
5234 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5237 error("chunk(%llu, %llu) is not valid, ignore it",
5238 key->offset, btrfs_chunk_length(eb, chunk));
5241 rec = btrfs_new_chunk_record(eb, key, slot);
5242 ret = insert_cache_extent(chunk_cache, &rec->cache);
5244 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5245 rec->offset, rec->length);
5252 static int process_device_item(struct rb_root *dev_cache,
5253 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5255 struct btrfs_dev_item *ptr;
5256 struct device_record *rec;
5259 ptr = btrfs_item_ptr(eb,
5260 slot, struct btrfs_dev_item);
5262 rec = malloc(sizeof(*rec));
5264 fprintf(stderr, "memory allocation failed\n");
5268 rec->devid = key->offset;
5269 rec->generation = btrfs_header_generation(eb);
5271 rec->objectid = key->objectid;
5272 rec->type = key->type;
5273 rec->offset = key->offset;
5275 rec->devid = btrfs_device_id(eb, ptr);
5276 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5277 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5279 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5281 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5288 struct block_group_record *
5289 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5292 struct btrfs_block_group_item *ptr;
5293 struct block_group_record *rec;
5295 rec = calloc(1, sizeof(*rec));
5297 fprintf(stderr, "memory allocation failed\n");
5301 rec->cache.start = key->objectid;
5302 rec->cache.size = key->offset;
5304 rec->generation = btrfs_header_generation(leaf);
5306 rec->objectid = key->objectid;
5307 rec->type = key->type;
5308 rec->offset = key->offset;
5310 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5311 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5313 INIT_LIST_HEAD(&rec->list);
5318 static int process_block_group_item(struct block_group_tree *block_group_cache,
5319 struct btrfs_key *key,
5320 struct extent_buffer *eb, int slot)
5322 struct block_group_record *rec;
5325 rec = btrfs_new_block_group_record(eb, key, slot);
5326 ret = insert_block_group_record(block_group_cache, rec);
5328 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5329 rec->objectid, rec->offset);
5336 struct device_extent_record *
5337 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5338 struct btrfs_key *key, int slot)
5340 struct device_extent_record *rec;
5341 struct btrfs_dev_extent *ptr;
5343 rec = calloc(1, sizeof(*rec));
5345 fprintf(stderr, "memory allocation failed\n");
5349 rec->cache.objectid = key->objectid;
5350 rec->cache.start = key->offset;
5352 rec->generation = btrfs_header_generation(leaf);
5354 rec->objectid = key->objectid;
5355 rec->type = key->type;
5356 rec->offset = key->offset;
5358 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5359 rec->chunk_objecteid =
5360 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5362 btrfs_dev_extent_chunk_offset(leaf, ptr);
5363 rec->length = btrfs_dev_extent_length(leaf, ptr);
5364 rec->cache.size = rec->length;
5366 INIT_LIST_HEAD(&rec->chunk_list);
5367 INIT_LIST_HEAD(&rec->device_list);
5373 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5374 struct btrfs_key *key, struct extent_buffer *eb,
5377 struct device_extent_record *rec;
5380 rec = btrfs_new_device_extent_record(eb, key, slot);
5381 ret = insert_device_extent_record(dev_extent_cache, rec);
5384 "Device extent[%llu, %llu, %llu] existed.\n",
5385 rec->objectid, rec->offset, rec->length);
5392 static int process_extent_item(struct btrfs_root *root,
5393 struct cache_tree *extent_cache,
5394 struct extent_buffer *eb, int slot)
5396 struct btrfs_extent_item *ei;
5397 struct btrfs_extent_inline_ref *iref;
5398 struct btrfs_extent_data_ref *dref;
5399 struct btrfs_shared_data_ref *sref;
5400 struct btrfs_key key;
5401 struct extent_record tmpl;
5405 u32 item_size = btrfs_item_size_nr(eb, slot);
5411 btrfs_item_key_to_cpu(eb, &key, slot);
5413 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5415 num_bytes = root->nodesize;
5417 num_bytes = key.offset;
5420 if (item_size < sizeof(*ei)) {
5421 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5422 struct btrfs_extent_item_v0 *ei0;
5423 BUG_ON(item_size != sizeof(*ei0));
5424 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5425 refs = btrfs_extent_refs_v0(eb, ei0);
5429 memset(&tmpl, 0, sizeof(tmpl));
5430 tmpl.start = key.objectid;
5431 tmpl.nr = num_bytes;
5432 tmpl.extent_item_refs = refs;
5433 tmpl.metadata = metadata;
5435 tmpl.max_size = num_bytes;
5437 return add_extent_rec(extent_cache, &tmpl);
5440 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5441 refs = btrfs_extent_refs(eb, ei);
5442 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5447 memset(&tmpl, 0, sizeof(tmpl));
5448 tmpl.start = key.objectid;
5449 tmpl.nr = num_bytes;
5450 tmpl.extent_item_refs = refs;
5451 tmpl.metadata = metadata;
5453 tmpl.max_size = num_bytes;
5454 add_extent_rec(extent_cache, &tmpl);
5456 ptr = (unsigned long)(ei + 1);
5457 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5458 key.type == BTRFS_EXTENT_ITEM_KEY)
5459 ptr += sizeof(struct btrfs_tree_block_info);
5461 end = (unsigned long)ei + item_size;
5463 iref = (struct btrfs_extent_inline_ref *)ptr;
5464 type = btrfs_extent_inline_ref_type(eb, iref);
5465 offset = btrfs_extent_inline_ref_offset(eb, iref);
5467 case BTRFS_TREE_BLOCK_REF_KEY:
5468 add_tree_backref(extent_cache, key.objectid,
5471 case BTRFS_SHARED_BLOCK_REF_KEY:
5472 add_tree_backref(extent_cache, key.objectid,
5475 case BTRFS_EXTENT_DATA_REF_KEY:
5476 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5477 add_data_backref(extent_cache, key.objectid, 0,
5478 btrfs_extent_data_ref_root(eb, dref),
5479 btrfs_extent_data_ref_objectid(eb,
5481 btrfs_extent_data_ref_offset(eb, dref),
5482 btrfs_extent_data_ref_count(eb, dref),
5485 case BTRFS_SHARED_DATA_REF_KEY:
5486 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5487 add_data_backref(extent_cache, key.objectid, offset,
5489 btrfs_shared_data_ref_count(eb, sref),
5493 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5494 key.objectid, key.type, num_bytes);
5497 ptr += btrfs_extent_inline_ref_size(type);
5504 static int check_cache_range(struct btrfs_root *root,
5505 struct btrfs_block_group_cache *cache,
5506 u64 offset, u64 bytes)
5508 struct btrfs_free_space *entry;
5514 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5515 bytenr = btrfs_sb_offset(i);
5516 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5517 cache->key.objectid, bytenr, 0,
5518 &logical, &nr, &stripe_len);
5523 if (logical[nr] + stripe_len <= offset)
5525 if (offset + bytes <= logical[nr])
5527 if (logical[nr] == offset) {
5528 if (stripe_len >= bytes) {
5532 bytes -= stripe_len;
5533 offset += stripe_len;
5534 } else if (logical[nr] < offset) {
5535 if (logical[nr] + stripe_len >=
5540 bytes = (offset + bytes) -
5541 (logical[nr] + stripe_len);
5542 offset = logical[nr] + stripe_len;
5545 * Could be tricky, the super may land in the
5546 * middle of the area we're checking. First
5547 * check the easiest case, it's at the end.
5549 if (logical[nr] + stripe_len >=
5551 bytes = logical[nr] - offset;
5555 /* Check the left side */
5556 ret = check_cache_range(root, cache,
5558 logical[nr] - offset);
5564 /* Now we continue with the right side */
5565 bytes = (offset + bytes) -
5566 (logical[nr] + stripe_len);
5567 offset = logical[nr] + stripe_len;
5574 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5576 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5577 offset, offset+bytes);
5581 if (entry->offset != offset) {
5582 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5587 if (entry->bytes != bytes) {
5588 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5589 bytes, entry->bytes, offset);
5593 unlink_free_space(cache->free_space_ctl, entry);
5598 static int verify_space_cache(struct btrfs_root *root,
5599 struct btrfs_block_group_cache *cache)
5601 struct btrfs_path *path;
5602 struct extent_buffer *leaf;
5603 struct btrfs_key key;
5607 path = btrfs_alloc_path();
5611 root = root->fs_info->extent_root;
5613 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5615 key.objectid = last;
5617 key.type = BTRFS_EXTENT_ITEM_KEY;
5619 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5624 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5625 ret = btrfs_next_leaf(root, path);
5633 leaf = path->nodes[0];
5634 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5635 if (key.objectid >= cache->key.offset + cache->key.objectid)
5637 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5638 key.type != BTRFS_METADATA_ITEM_KEY) {
5643 if (last == key.objectid) {
5644 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5645 last = key.objectid + key.offset;
5647 last = key.objectid + root->nodesize;
5652 ret = check_cache_range(root, cache, last,
5653 key.objectid - last);
5656 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5657 last = key.objectid + key.offset;
5659 last = key.objectid + root->nodesize;
5663 if (last < cache->key.objectid + cache->key.offset)
5664 ret = check_cache_range(root, cache, last,
5665 cache->key.objectid +
5666 cache->key.offset - last);
5669 btrfs_free_path(path);
5672 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5673 fprintf(stderr, "There are still entries left in the space "
5681 static int check_space_cache(struct btrfs_root *root)
5683 struct btrfs_block_group_cache *cache;
5684 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5688 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5689 btrfs_super_generation(root->fs_info->super_copy) !=
5690 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5691 printf("cache and super generation don't match, space cache "
5692 "will be invalidated\n");
5696 if (ctx.progress_enabled) {
5697 ctx.tp = TASK_FREE_SPACE;
5698 task_start(ctx.info);
5702 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5706 start = cache->key.objectid + cache->key.offset;
5707 if (!cache->free_space_ctl) {
5708 if (btrfs_init_free_space_ctl(cache,
5709 root->sectorsize)) {
5714 btrfs_remove_free_space_cache(cache);
5717 if (btrfs_fs_compat_ro(root->fs_info,
5718 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5719 ret = exclude_super_stripes(root, cache);
5721 fprintf(stderr, "could not exclude super stripes: %s\n",
5726 ret = load_free_space_tree(root->fs_info, cache);
5727 free_excluded_extents(root, cache);
5729 fprintf(stderr, "could not load free space tree: %s\n",
5736 ret = load_free_space_cache(root->fs_info, cache);
5741 ret = verify_space_cache(root, cache);
5743 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5744 cache->key.objectid);
5749 task_stop(ctx.info);
5751 return error ? -EINVAL : 0;
5754 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5755 u64 num_bytes, unsigned long leaf_offset,
5756 struct extent_buffer *eb) {
5759 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5761 unsigned long csum_offset;
5765 u64 data_checked = 0;
5771 if (num_bytes % root->sectorsize)
5774 data = malloc(num_bytes);
5778 while (offset < num_bytes) {
5781 read_len = num_bytes - offset;
5782 /* read as much space once a time */
5783 ret = read_extent_data(root, data + offset,
5784 bytenr + offset, &read_len, mirror);
5788 /* verify every 4k data's checksum */
5789 while (data_checked < read_len) {
5791 tmp = offset + data_checked;
5793 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5794 csum, root->sectorsize);
5795 btrfs_csum_final(csum, (char *)&csum);
5797 csum_offset = leaf_offset +
5798 tmp / root->sectorsize * csum_size;
5799 read_extent_buffer(eb, (char *)&csum_expected,
5800 csum_offset, csum_size);
5801 /* try another mirror */
5802 if (csum != csum_expected) {
5803 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5804 mirror, bytenr + tmp,
5805 csum, csum_expected);
5806 num_copies = btrfs_num_copies(
5807 &root->fs_info->mapping_tree,
5809 if (mirror < num_copies - 1) {
5814 data_checked += root->sectorsize;
5823 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5826 struct btrfs_path *path;
5827 struct extent_buffer *leaf;
5828 struct btrfs_key key;
5831 path = btrfs_alloc_path();
5833 fprintf(stderr, "Error allocating path\n");
5837 key.objectid = bytenr;
5838 key.type = BTRFS_EXTENT_ITEM_KEY;
5839 key.offset = (u64)-1;
5842 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5845 fprintf(stderr, "Error looking up extent record %d\n", ret);
5846 btrfs_free_path(path);
5849 if (path->slots[0] > 0) {
5852 ret = btrfs_prev_leaf(root, path);
5855 } else if (ret > 0) {
5862 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5865 * Block group items come before extent items if they have the same
5866 * bytenr, so walk back one more just in case. Dear future traveller,
5867 * first congrats on mastering time travel. Now if it's not too much
5868 * trouble could you go back to 2006 and tell Chris to make the
5869 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5870 * EXTENT_ITEM_KEY please?
5872 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5873 if (path->slots[0] > 0) {
5876 ret = btrfs_prev_leaf(root, path);
5879 } else if (ret > 0) {
5884 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5888 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5889 ret = btrfs_next_leaf(root, path);
5891 fprintf(stderr, "Error going to next leaf "
5893 btrfs_free_path(path);
5899 leaf = path->nodes[0];
5900 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5901 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5905 if (key.objectid + key.offset < bytenr) {
5909 if (key.objectid > bytenr + num_bytes)
5912 if (key.objectid == bytenr) {
5913 if (key.offset >= num_bytes) {
5917 num_bytes -= key.offset;
5918 bytenr += key.offset;
5919 } else if (key.objectid < bytenr) {
5920 if (key.objectid + key.offset >= bytenr + num_bytes) {
5924 num_bytes = (bytenr + num_bytes) -
5925 (key.objectid + key.offset);
5926 bytenr = key.objectid + key.offset;
5928 if (key.objectid + key.offset < bytenr + num_bytes) {
5929 u64 new_start = key.objectid + key.offset;
5930 u64 new_bytes = bytenr + num_bytes - new_start;
5933 * Weird case, the extent is in the middle of
5934 * our range, we'll have to search one side
5935 * and then the other. Not sure if this happens
5936 * in real life, but no harm in coding it up
5937 * anyway just in case.
5939 btrfs_release_path(path);
5940 ret = check_extent_exists(root, new_start,
5943 fprintf(stderr, "Right section didn't "
5947 num_bytes = key.objectid - bytenr;
5950 num_bytes = key.objectid - bytenr;
5957 if (num_bytes && !ret) {
5958 fprintf(stderr, "There are no extents for csum range "
5959 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5963 btrfs_free_path(path);
5967 static int check_csums(struct btrfs_root *root)
5969 struct btrfs_path *path;
5970 struct extent_buffer *leaf;
5971 struct btrfs_key key;
5972 u64 offset = 0, num_bytes = 0;
5973 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5977 unsigned long leaf_offset;
5979 root = root->fs_info->csum_root;
5980 if (!extent_buffer_uptodate(root->node)) {
5981 fprintf(stderr, "No valid csum tree found\n");
5985 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5986 key.type = BTRFS_EXTENT_CSUM_KEY;
5989 path = btrfs_alloc_path();
5993 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5995 fprintf(stderr, "Error searching csum tree %d\n", ret);
5996 btrfs_free_path(path);
6000 if (ret > 0 && path->slots[0])
6005 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6006 ret = btrfs_next_leaf(root, path);
6008 fprintf(stderr, "Error going to next leaf "
6015 leaf = path->nodes[0];
6017 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6018 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6023 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6024 csum_size) * root->sectorsize;
6025 if (!check_data_csum)
6026 goto skip_csum_check;
6027 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6028 ret = check_extent_csums(root, key.offset, data_len,
6034 offset = key.offset;
6035 } else if (key.offset != offset + num_bytes) {
6036 ret = check_extent_exists(root, offset, num_bytes);
6038 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6039 "there is no extent record\n",
6040 offset, offset+num_bytes);
6043 offset = key.offset;
6046 num_bytes += data_len;
6050 btrfs_free_path(path);
6054 static int is_dropped_key(struct btrfs_key *key,
6055 struct btrfs_key *drop_key) {
6056 if (key->objectid < drop_key->objectid)
6058 else if (key->objectid == drop_key->objectid) {
6059 if (key->type < drop_key->type)
6061 else if (key->type == drop_key->type) {
6062 if (key->offset < drop_key->offset)
6070 * Here are the rules for FULL_BACKREF.
6072 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6073 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6075 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6076 * if it happened after the relocation occurred since we'll have dropped the
6077 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6078 * have no real way to know for sure.
6080 * We process the blocks one root at a time, and we start from the lowest root
6081 * objectid and go to the highest. So we can just lookup the owner backref for
6082 * the record and if we don't find it then we know it doesn't exist and we have
6085 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6086 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6087 * be set or not and then we can check later once we've gathered all the refs.
6089 static int calc_extent_flag(struct btrfs_root *root,
6090 struct cache_tree *extent_cache,
6091 struct extent_buffer *buf,
6092 struct root_item_record *ri,
6095 struct extent_record *rec;
6096 struct cache_extent *cache;
6097 struct tree_backref *tback;
6100 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6101 /* we have added this extent before */
6103 rec = container_of(cache, struct extent_record, cache);
6106 * Except file/reloc tree, we can not have
6109 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6114 if (buf->start == ri->bytenr)
6117 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6120 owner = btrfs_header_owner(buf);
6121 if (owner == ri->objectid)
6124 tback = find_tree_backref(rec, 0, owner);
6129 if (rec->flag_block_full_backref != FLAG_UNSET &&
6130 rec->flag_block_full_backref != 0)
6131 rec->bad_full_backref = 1;
6134 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6135 if (rec->flag_block_full_backref != FLAG_UNSET &&
6136 rec->flag_block_full_backref != 1)
6137 rec->bad_full_backref = 1;
6141 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6143 fprintf(stderr, "Invalid key type(");
6144 print_key_type(stderr, 0, key_type);
6145 fprintf(stderr, ") found in root(");
6146 print_objectid(stderr, rootid, 0);
6147 fprintf(stderr, ")\n");
6151 * Check if the key is valid with its extent buffer.
6153 * This is a early check in case invalid key exists in a extent buffer
6154 * This is not comprehensive yet, but should prevent wrong key/item passed
6157 static int check_type_with_root(u64 rootid, u8 key_type)
6160 /* Only valid in chunk tree */
6161 case BTRFS_DEV_ITEM_KEY:
6162 case BTRFS_CHUNK_ITEM_KEY:
6163 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6166 /* valid in csum and log tree */
6167 case BTRFS_CSUM_TREE_OBJECTID:
6168 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6172 case BTRFS_EXTENT_ITEM_KEY:
6173 case BTRFS_METADATA_ITEM_KEY:
6174 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6175 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6178 case BTRFS_ROOT_ITEM_KEY:
6179 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6182 case BTRFS_DEV_EXTENT_KEY:
6183 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6189 report_mismatch_key_root(key_type, rootid);
6193 static int run_next_block(struct btrfs_root *root,
6194 struct block_info *bits,
6197 struct cache_tree *pending,
6198 struct cache_tree *seen,
6199 struct cache_tree *reada,
6200 struct cache_tree *nodes,
6201 struct cache_tree *extent_cache,
6202 struct cache_tree *chunk_cache,
6203 struct rb_root *dev_cache,
6204 struct block_group_tree *block_group_cache,
6205 struct device_extent_tree *dev_extent_cache,
6206 struct root_item_record *ri)
6208 struct extent_buffer *buf;
6209 struct extent_record *rec = NULL;
6220 struct btrfs_key key;
6221 struct cache_extent *cache;
6224 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6225 bits_nr, &reada_bits);
6230 for(i = 0; i < nritems; i++) {
6231 ret = add_cache_extent(reada, bits[i].start,
6236 /* fixme, get the parent transid */
6237 readahead_tree_block(root, bits[i].start,
6241 *last = bits[0].start;
6242 bytenr = bits[0].start;
6243 size = bits[0].size;
6245 cache = lookup_cache_extent(pending, bytenr, size);
6247 remove_cache_extent(pending, cache);
6250 cache = lookup_cache_extent(reada, bytenr, size);
6252 remove_cache_extent(reada, cache);
6255 cache = lookup_cache_extent(nodes, bytenr, size);
6257 remove_cache_extent(nodes, cache);
6260 cache = lookup_cache_extent(extent_cache, bytenr, size);
6262 rec = container_of(cache, struct extent_record, cache);
6263 gen = rec->parent_generation;
6266 /* fixme, get the real parent transid */
6267 buf = read_tree_block(root, bytenr, size, gen);
6268 if (!extent_buffer_uptodate(buf)) {
6269 record_bad_block_io(root->fs_info,
6270 extent_cache, bytenr, size);
6274 nritems = btrfs_header_nritems(buf);
6277 if (!init_extent_tree) {
6278 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6279 btrfs_header_level(buf), 1, NULL,
6282 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6284 fprintf(stderr, "Couldn't calc extent flags\n");
6285 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6290 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6292 fprintf(stderr, "Couldn't calc extent flags\n");
6293 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6297 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6299 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6300 ri->objectid == btrfs_header_owner(buf)) {
6302 * Ok we got to this block from it's original owner and
6303 * we have FULL_BACKREF set. Relocation can leave
6304 * converted blocks over so this is altogether possible,
6305 * however it's not possible if the generation > the
6306 * last snapshot, so check for this case.
6308 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6309 btrfs_header_generation(buf) > ri->last_snapshot) {
6310 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6311 rec->bad_full_backref = 1;
6316 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6317 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6318 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6319 rec->bad_full_backref = 1;
6323 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6324 rec->flag_block_full_backref = 1;
6328 rec->flag_block_full_backref = 0;
6330 owner = btrfs_header_owner(buf);
6333 ret = check_block(root, extent_cache, buf, flags);
6337 if (btrfs_is_leaf(buf)) {
6338 btree_space_waste += btrfs_leaf_free_space(root, buf);
6339 for (i = 0; i < nritems; i++) {
6340 struct btrfs_file_extent_item *fi;
6341 btrfs_item_key_to_cpu(buf, &key, i);
6343 * Check key type against the leaf owner.
6344 * Could filter quite a lot of early error if
6347 if (check_type_with_root(btrfs_header_owner(buf),
6349 fprintf(stderr, "ignoring invalid key\n");
6352 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6353 process_extent_item(root, extent_cache, buf,
6357 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6358 process_extent_item(root, extent_cache, buf,
6362 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6364 btrfs_item_size_nr(buf, i);
6367 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6368 process_chunk_item(chunk_cache, &key, buf, i);
6371 if (key.type == BTRFS_DEV_ITEM_KEY) {
6372 process_device_item(dev_cache, &key, buf, i);
6375 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6376 process_block_group_item(block_group_cache,
6380 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6381 process_device_extent_item(dev_extent_cache,
6386 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6387 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6388 process_extent_ref_v0(extent_cache, buf, i);
6395 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6396 add_tree_backref(extent_cache, key.objectid, 0,
6400 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6401 add_tree_backref(extent_cache, key.objectid,
6405 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6406 struct btrfs_extent_data_ref *ref;
6407 ref = btrfs_item_ptr(buf, i,
6408 struct btrfs_extent_data_ref);
6409 add_data_backref(extent_cache,
6411 btrfs_extent_data_ref_root(buf, ref),
6412 btrfs_extent_data_ref_objectid(buf,
6414 btrfs_extent_data_ref_offset(buf, ref),
6415 btrfs_extent_data_ref_count(buf, ref),
6416 0, root->sectorsize);
6419 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6420 struct btrfs_shared_data_ref *ref;
6421 ref = btrfs_item_ptr(buf, i,
6422 struct btrfs_shared_data_ref);
6423 add_data_backref(extent_cache,
6424 key.objectid, key.offset, 0, 0, 0,
6425 btrfs_shared_data_ref_count(buf, ref),
6426 0, root->sectorsize);
6429 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6430 struct bad_item *bad;
6432 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6436 bad = malloc(sizeof(struct bad_item));
6439 INIT_LIST_HEAD(&bad->list);
6440 memcpy(&bad->key, &key,
6441 sizeof(struct btrfs_key));
6442 bad->root_id = owner;
6443 list_add_tail(&bad->list, &delete_items);
6446 if (key.type != BTRFS_EXTENT_DATA_KEY)
6448 fi = btrfs_item_ptr(buf, i,
6449 struct btrfs_file_extent_item);
6450 if (btrfs_file_extent_type(buf, fi) ==
6451 BTRFS_FILE_EXTENT_INLINE)
6453 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6456 data_bytes_allocated +=
6457 btrfs_file_extent_disk_num_bytes(buf, fi);
6458 if (data_bytes_allocated < root->sectorsize) {
6461 data_bytes_referenced +=
6462 btrfs_file_extent_num_bytes(buf, fi);
6463 add_data_backref(extent_cache,
6464 btrfs_file_extent_disk_bytenr(buf, fi),
6465 parent, owner, key.objectid, key.offset -
6466 btrfs_file_extent_offset(buf, fi), 1, 1,
6467 btrfs_file_extent_disk_num_bytes(buf, fi));
6471 struct btrfs_key first_key;
6473 first_key.objectid = 0;
6476 btrfs_item_key_to_cpu(buf, &first_key, 0);
6477 level = btrfs_header_level(buf);
6478 for (i = 0; i < nritems; i++) {
6479 struct extent_record tmpl;
6481 ptr = btrfs_node_blockptr(buf, i);
6482 size = root->nodesize;
6483 btrfs_node_key_to_cpu(buf, &key, i);
6485 if ((level == ri->drop_level)
6486 && is_dropped_key(&key, &ri->drop_key)) {
6491 memset(&tmpl, 0, sizeof(tmpl));
6492 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6493 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6498 tmpl.max_size = size;
6499 ret = add_extent_rec(extent_cache, &tmpl);
6502 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6505 add_pending(nodes, seen, ptr, size);
6507 add_pending(pending, seen, ptr, size);
6510 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6511 nritems) * sizeof(struct btrfs_key_ptr);
6513 total_btree_bytes += buf->len;
6514 if (fs_root_objectid(btrfs_header_owner(buf)))
6515 total_fs_tree_bytes += buf->len;
6516 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6517 total_extent_tree_bytes += buf->len;
6518 if (!found_old_backref &&
6519 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6520 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6521 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6522 found_old_backref = 1;
6524 free_extent_buffer(buf);
6528 static int add_root_to_pending(struct extent_buffer *buf,
6529 struct cache_tree *extent_cache,
6530 struct cache_tree *pending,
6531 struct cache_tree *seen,
6532 struct cache_tree *nodes,
6535 struct extent_record tmpl;
6537 if (btrfs_header_level(buf) > 0)
6538 add_pending(nodes, seen, buf->start, buf->len);
6540 add_pending(pending, seen, buf->start, buf->len);
6542 memset(&tmpl, 0, sizeof(tmpl));
6543 tmpl.start = buf->start;
6548 tmpl.max_size = buf->len;
6549 add_extent_rec(extent_cache, &tmpl);
6551 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6552 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6553 add_tree_backref(extent_cache, buf->start, buf->start,
6556 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6560 /* as we fix the tree, we might be deleting blocks that
6561 * we're tracking for repair. This hook makes sure we
6562 * remove any backrefs for blocks as we are fixing them.
6564 static int free_extent_hook(struct btrfs_trans_handle *trans,
6565 struct btrfs_root *root,
6566 u64 bytenr, u64 num_bytes, u64 parent,
6567 u64 root_objectid, u64 owner, u64 offset,
6570 struct extent_record *rec;
6571 struct cache_extent *cache;
6573 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6575 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6576 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6580 rec = container_of(cache, struct extent_record, cache);
6582 struct data_backref *back;
6583 back = find_data_backref(rec, parent, root_objectid, owner,
6584 offset, 1, bytenr, num_bytes);
6587 if (back->node.found_ref) {
6588 back->found_ref -= refs_to_drop;
6590 rec->refs -= refs_to_drop;
6592 if (back->node.found_extent_tree) {
6593 back->num_refs -= refs_to_drop;
6594 if (rec->extent_item_refs)
6595 rec->extent_item_refs -= refs_to_drop;
6597 if (back->found_ref == 0)
6598 back->node.found_ref = 0;
6599 if (back->num_refs == 0)
6600 back->node.found_extent_tree = 0;
6602 if (!back->node.found_extent_tree && back->node.found_ref) {
6603 rb_erase(&back->node.node, &rec->backref_tree);
6607 struct tree_backref *back;
6608 back = find_tree_backref(rec, parent, root_objectid);
6611 if (back->node.found_ref) {
6614 back->node.found_ref = 0;
6616 if (back->node.found_extent_tree) {
6617 if (rec->extent_item_refs)
6618 rec->extent_item_refs--;
6619 back->node.found_extent_tree = 0;
6621 if (!back->node.found_extent_tree && back->node.found_ref) {
6622 rb_erase(&back->node.node, &rec->backref_tree);
6626 maybe_free_extent_rec(extent_cache, rec);
6631 static int delete_extent_records(struct btrfs_trans_handle *trans,
6632 struct btrfs_root *root,
6633 struct btrfs_path *path,
6634 u64 bytenr, u64 new_len)
6636 struct btrfs_key key;
6637 struct btrfs_key found_key;
6638 struct extent_buffer *leaf;
6643 key.objectid = bytenr;
6645 key.offset = (u64)-1;
6648 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6655 if (path->slots[0] == 0)
6661 leaf = path->nodes[0];
6662 slot = path->slots[0];
6664 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6665 if (found_key.objectid != bytenr)
6668 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6669 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6670 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6671 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6672 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6673 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6674 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6675 btrfs_release_path(path);
6676 if (found_key.type == 0) {
6677 if (found_key.offset == 0)
6679 key.offset = found_key.offset - 1;
6680 key.type = found_key.type;
6682 key.type = found_key.type - 1;
6683 key.offset = (u64)-1;
6687 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6688 found_key.objectid, found_key.type, found_key.offset);
6690 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6693 btrfs_release_path(path);
6695 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6696 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6697 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6698 found_key.offset : root->nodesize;
6700 ret = btrfs_update_block_group(trans, root, bytenr,
6707 btrfs_release_path(path);
6712 * for a single backref, this will allocate a new extent
6713 * and add the backref to it.
6715 static int record_extent(struct btrfs_trans_handle *trans,
6716 struct btrfs_fs_info *info,
6717 struct btrfs_path *path,
6718 struct extent_record *rec,
6719 struct extent_backref *back,
6720 int allocated, u64 flags)
6723 struct btrfs_root *extent_root = info->extent_root;
6724 struct extent_buffer *leaf;
6725 struct btrfs_key ins_key;
6726 struct btrfs_extent_item *ei;
6727 struct tree_backref *tback;
6728 struct data_backref *dback;
6729 struct btrfs_tree_block_info *bi;
6732 rec->max_size = max_t(u64, rec->max_size,
6733 info->extent_root->nodesize);
6736 u32 item_size = sizeof(*ei);
6739 item_size += sizeof(*bi);
6741 ins_key.objectid = rec->start;
6742 ins_key.offset = rec->max_size;
6743 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6745 ret = btrfs_insert_empty_item(trans, extent_root, path,
6746 &ins_key, item_size);
6750 leaf = path->nodes[0];
6751 ei = btrfs_item_ptr(leaf, path->slots[0],
6752 struct btrfs_extent_item);
6754 btrfs_set_extent_refs(leaf, ei, 0);
6755 btrfs_set_extent_generation(leaf, ei, rec->generation);
6757 if (back->is_data) {
6758 btrfs_set_extent_flags(leaf, ei,
6759 BTRFS_EXTENT_FLAG_DATA);
6761 struct btrfs_disk_key copy_key;;
6763 tback = to_tree_backref(back);
6764 bi = (struct btrfs_tree_block_info *)(ei + 1);
6765 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6768 btrfs_set_disk_key_objectid(©_key,
6769 rec->info_objectid);
6770 btrfs_set_disk_key_type(©_key, 0);
6771 btrfs_set_disk_key_offset(©_key, 0);
6773 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6774 btrfs_set_tree_block_key(leaf, bi, ©_key);
6776 btrfs_set_extent_flags(leaf, ei,
6777 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6780 btrfs_mark_buffer_dirty(leaf);
6781 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6782 rec->max_size, 1, 0);
6785 btrfs_release_path(path);
6788 if (back->is_data) {
6792 dback = to_data_backref(back);
6793 if (back->full_backref)
6794 parent = dback->parent;
6798 for (i = 0; i < dback->found_ref; i++) {
6799 /* if parent != 0, we're doing a full backref
6800 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6801 * just makes the backref allocator create a data
6804 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6805 rec->start, rec->max_size,
6809 BTRFS_FIRST_FREE_OBJECTID :
6815 fprintf(stderr, "adding new data backref"
6816 " on %llu %s %llu owner %llu"
6817 " offset %llu found %d\n",
6818 (unsigned long long)rec->start,
6819 back->full_backref ?
6821 back->full_backref ?
6822 (unsigned long long)parent :
6823 (unsigned long long)dback->root,
6824 (unsigned long long)dback->owner,
6825 (unsigned long long)dback->offset,
6830 tback = to_tree_backref(back);
6831 if (back->full_backref)
6832 parent = tback->parent;
6836 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6837 rec->start, rec->max_size,
6838 parent, tback->root, 0, 0);
6839 fprintf(stderr, "adding new tree backref on "
6840 "start %llu len %llu parent %llu root %llu\n",
6841 rec->start, rec->max_size, parent, tback->root);
6844 btrfs_release_path(path);
6848 static struct extent_entry *find_entry(struct list_head *entries,
6849 u64 bytenr, u64 bytes)
6851 struct extent_entry *entry = NULL;
6853 list_for_each_entry(entry, entries, list) {
6854 if (entry->bytenr == bytenr && entry->bytes == bytes)
6861 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6863 struct extent_entry *entry, *best = NULL, *prev = NULL;
6865 list_for_each_entry(entry, entries, list) {
6872 * If there are as many broken entries as entries then we know
6873 * not to trust this particular entry.
6875 if (entry->broken == entry->count)
6879 * If our current entry == best then we can't be sure our best
6880 * is really the best, so we need to keep searching.
6882 if (best && best->count == entry->count) {
6888 /* Prev == entry, not good enough, have to keep searching */
6889 if (!prev->broken && prev->count == entry->count)
6893 best = (prev->count > entry->count) ? prev : entry;
6894 else if (best->count < entry->count)
6902 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6903 struct data_backref *dback, struct extent_entry *entry)
6905 struct btrfs_trans_handle *trans;
6906 struct btrfs_root *root;
6907 struct btrfs_file_extent_item *fi;
6908 struct extent_buffer *leaf;
6909 struct btrfs_key key;
6913 key.objectid = dback->root;
6914 key.type = BTRFS_ROOT_ITEM_KEY;
6915 key.offset = (u64)-1;
6916 root = btrfs_read_fs_root(info, &key);
6918 fprintf(stderr, "Couldn't find root for our ref\n");
6923 * The backref points to the original offset of the extent if it was
6924 * split, so we need to search down to the offset we have and then walk
6925 * forward until we find the backref we're looking for.
6927 key.objectid = dback->owner;
6928 key.type = BTRFS_EXTENT_DATA_KEY;
6929 key.offset = dback->offset;
6930 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6932 fprintf(stderr, "Error looking up ref %d\n", ret);
6937 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6938 ret = btrfs_next_leaf(root, path);
6940 fprintf(stderr, "Couldn't find our ref, next\n");
6944 leaf = path->nodes[0];
6945 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6946 if (key.objectid != dback->owner ||
6947 key.type != BTRFS_EXTENT_DATA_KEY) {
6948 fprintf(stderr, "Couldn't find our ref, search\n");
6951 fi = btrfs_item_ptr(leaf, path->slots[0],
6952 struct btrfs_file_extent_item);
6953 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6954 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6956 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6961 btrfs_release_path(path);
6963 trans = btrfs_start_transaction(root, 1);
6965 return PTR_ERR(trans);
6968 * Ok we have the key of the file extent we want to fix, now we can cow
6969 * down to the thing and fix it.
6971 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6973 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6974 key.objectid, key.type, key.offset, ret);
6978 fprintf(stderr, "Well that's odd, we just found this key "
6979 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6984 leaf = path->nodes[0];
6985 fi = btrfs_item_ptr(leaf, path->slots[0],
6986 struct btrfs_file_extent_item);
6988 if (btrfs_file_extent_compression(leaf, fi) &&
6989 dback->disk_bytenr != entry->bytenr) {
6990 fprintf(stderr, "Ref doesn't match the record start and is "
6991 "compressed, please take a btrfs-image of this file "
6992 "system and send it to a btrfs developer so they can "
6993 "complete this functionality for bytenr %Lu\n",
6994 dback->disk_bytenr);
6999 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7000 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7001 } else if (dback->disk_bytenr > entry->bytenr) {
7002 u64 off_diff, offset;
7004 off_diff = dback->disk_bytenr - entry->bytenr;
7005 offset = btrfs_file_extent_offset(leaf, fi);
7006 if (dback->disk_bytenr + offset +
7007 btrfs_file_extent_num_bytes(leaf, fi) >
7008 entry->bytenr + entry->bytes) {
7009 fprintf(stderr, "Ref is past the entry end, please "
7010 "take a btrfs-image of this file system and "
7011 "send it to a btrfs developer, ref %Lu\n",
7012 dback->disk_bytenr);
7017 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7018 btrfs_set_file_extent_offset(leaf, fi, offset);
7019 } else if (dback->disk_bytenr < entry->bytenr) {
7022 offset = btrfs_file_extent_offset(leaf, fi);
7023 if (dback->disk_bytenr + offset < entry->bytenr) {
7024 fprintf(stderr, "Ref is before the entry start, please"
7025 " take a btrfs-image of this file system and "
7026 "send it to a btrfs developer, ref %Lu\n",
7027 dback->disk_bytenr);
7032 offset += dback->disk_bytenr;
7033 offset -= entry->bytenr;
7034 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7035 btrfs_set_file_extent_offset(leaf, fi, offset);
7038 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7041 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7042 * only do this if we aren't using compression, otherwise it's a
7045 if (!btrfs_file_extent_compression(leaf, fi))
7046 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7048 printf("ram bytes may be wrong?\n");
7049 btrfs_mark_buffer_dirty(leaf);
7051 err = btrfs_commit_transaction(trans, root);
7052 btrfs_release_path(path);
7053 return ret ? ret : err;
7056 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7057 struct extent_record *rec)
7059 struct extent_backref *back, *tmp;
7060 struct data_backref *dback;
7061 struct extent_entry *entry, *best = NULL;
7064 int broken_entries = 0;
7069 * Metadata is easy and the backrefs should always agree on bytenr and
7070 * size, if not we've got bigger issues.
7075 rbtree_postorder_for_each_entry_safe(back, tmp,
7076 &rec->backref_tree, node) {
7077 if (back->full_backref || !back->is_data)
7080 dback = to_data_backref(back);
7083 * We only pay attention to backrefs that we found a real
7086 if (dback->found_ref == 0)
7090 * For now we only catch when the bytes don't match, not the
7091 * bytenr. We can easily do this at the same time, but I want
7092 * to have a fs image to test on before we just add repair
7093 * functionality willy-nilly so we know we won't screw up the
7097 entry = find_entry(&entries, dback->disk_bytenr,
7100 entry = malloc(sizeof(struct extent_entry));
7105 memset(entry, 0, sizeof(*entry));
7106 entry->bytenr = dback->disk_bytenr;
7107 entry->bytes = dback->bytes;
7108 list_add_tail(&entry->list, &entries);
7113 * If we only have on entry we may think the entries agree when
7114 * in reality they don't so we have to do some extra checking.
7116 if (dback->disk_bytenr != rec->start ||
7117 dback->bytes != rec->nr || back->broken)
7128 /* Yay all the backrefs agree, carry on good sir */
7129 if (nr_entries <= 1 && !mismatch)
7132 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7133 "%Lu\n", rec->start);
7136 * First we want to see if the backrefs can agree amongst themselves who
7137 * is right, so figure out which one of the entries has the highest
7140 best = find_most_right_entry(&entries);
7143 * Ok so we may have an even split between what the backrefs think, so
7144 * this is where we use the extent ref to see what it thinks.
7147 entry = find_entry(&entries, rec->start, rec->nr);
7148 if (!entry && (!broken_entries || !rec->found_rec)) {
7149 fprintf(stderr, "Backrefs don't agree with each other "
7150 "and extent record doesn't agree with anybody,"
7151 " so we can't fix bytenr %Lu bytes %Lu\n",
7152 rec->start, rec->nr);
7155 } else if (!entry) {
7157 * Ok our backrefs were broken, we'll assume this is the
7158 * correct value and add an entry for this range.
7160 entry = malloc(sizeof(struct extent_entry));
7165 memset(entry, 0, sizeof(*entry));
7166 entry->bytenr = rec->start;
7167 entry->bytes = rec->nr;
7168 list_add_tail(&entry->list, &entries);
7172 best = find_most_right_entry(&entries);
7174 fprintf(stderr, "Backrefs and extent record evenly "
7175 "split on who is right, this is going to "
7176 "require user input to fix bytenr %Lu bytes "
7177 "%Lu\n", rec->start, rec->nr);
7184 * I don't think this can happen currently as we'll abort() if we catch
7185 * this case higher up, but in case somebody removes that we still can't
7186 * deal with it properly here yet, so just bail out of that's the case.
7188 if (best->bytenr != rec->start) {
7189 fprintf(stderr, "Extent start and backref starts don't match, "
7190 "please use btrfs-image on this file system and send "
7191 "it to a btrfs developer so they can make fsck fix "
7192 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7193 rec->start, rec->nr);
7199 * Ok great we all agreed on an extent record, let's go find the real
7200 * references and fix up the ones that don't match.
7202 rbtree_postorder_for_each_entry_safe(back, tmp,
7203 &rec->backref_tree, node) {
7204 if (back->full_backref || !back->is_data)
7207 dback = to_data_backref(back);
7210 * Still ignoring backrefs that don't have a real ref attached
7213 if (dback->found_ref == 0)
7216 if (dback->bytes == best->bytes &&
7217 dback->disk_bytenr == best->bytenr)
7220 ret = repair_ref(info, path, dback, best);
7226 * Ok we messed with the actual refs, which means we need to drop our
7227 * entire cache and go back and rescan. I know this is a huge pain and
7228 * adds a lot of extra work, but it's the only way to be safe. Once all
7229 * the backrefs agree we may not need to do anything to the extent
7234 while (!list_empty(&entries)) {
7235 entry = list_entry(entries.next, struct extent_entry, list);
7236 list_del_init(&entry->list);
7242 static int process_duplicates(struct btrfs_root *root,
7243 struct cache_tree *extent_cache,
7244 struct extent_record *rec)
7246 struct extent_record *good, *tmp;
7247 struct cache_extent *cache;
7251 * If we found a extent record for this extent then return, or if we
7252 * have more than one duplicate we are likely going to need to delete
7255 if (rec->found_rec || rec->num_duplicates > 1)
7258 /* Shouldn't happen but just in case */
7259 BUG_ON(!rec->num_duplicates);
7262 * So this happens if we end up with a backref that doesn't match the
7263 * actual extent entry. So either the backref is bad or the extent
7264 * entry is bad. Either way we want to have the extent_record actually
7265 * reflect what we found in the extent_tree, so we need to take the
7266 * duplicate out and use that as the extent_record since the only way we
7267 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7269 remove_cache_extent(extent_cache, &rec->cache);
7271 good = to_extent_record(rec->dups.next);
7272 list_del_init(&good->list);
7273 INIT_LIST_HEAD(&good->backrefs);
7274 INIT_LIST_HEAD(&good->dups);
7275 good->cache.start = good->start;
7276 good->cache.size = good->nr;
7277 good->content_checked = 0;
7278 good->owner_ref_checked = 0;
7279 good->num_duplicates = 0;
7280 good->refs = rec->refs;
7281 list_splice_init(&rec->backrefs, &good->backrefs);
7283 cache = lookup_cache_extent(extent_cache, good->start,
7287 tmp = container_of(cache, struct extent_record, cache);
7290 * If we find another overlapping extent and it's found_rec is
7291 * set then it's a duplicate and we need to try and delete
7294 if (tmp->found_rec || tmp->num_duplicates > 0) {
7295 if (list_empty(&good->list))
7296 list_add_tail(&good->list,
7297 &duplicate_extents);
7298 good->num_duplicates += tmp->num_duplicates + 1;
7299 list_splice_init(&tmp->dups, &good->dups);
7300 list_del_init(&tmp->list);
7301 list_add_tail(&tmp->list, &good->dups);
7302 remove_cache_extent(extent_cache, &tmp->cache);
7307 * Ok we have another non extent item backed extent rec, so lets
7308 * just add it to this extent and carry on like we did above.
7310 good->refs += tmp->refs;
7311 list_splice_init(&tmp->backrefs, &good->backrefs);
7312 remove_cache_extent(extent_cache, &tmp->cache);
7315 ret = insert_cache_extent(extent_cache, &good->cache);
7318 return good->num_duplicates ? 0 : 1;
7321 static int delete_duplicate_records(struct btrfs_root *root,
7322 struct extent_record *rec)
7324 struct btrfs_trans_handle *trans;
7325 LIST_HEAD(delete_list);
7326 struct btrfs_path *path;
7327 struct extent_record *tmp, *good, *n;
7330 struct btrfs_key key;
7332 path = btrfs_alloc_path();
7339 /* Find the record that covers all of the duplicates. */
7340 list_for_each_entry(tmp, &rec->dups, list) {
7341 if (good->start < tmp->start)
7343 if (good->nr > tmp->nr)
7346 if (tmp->start + tmp->nr < good->start + good->nr) {
7347 fprintf(stderr, "Ok we have overlapping extents that "
7348 "aren't completely covered by each other, this "
7349 "is going to require more careful thought. "
7350 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7351 tmp->start, tmp->nr, good->start, good->nr);
7358 list_add_tail(&rec->list, &delete_list);
7360 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7363 list_move_tail(&tmp->list, &delete_list);
7366 root = root->fs_info->extent_root;
7367 trans = btrfs_start_transaction(root, 1);
7368 if (IS_ERR(trans)) {
7369 ret = PTR_ERR(trans);
7373 list_for_each_entry(tmp, &delete_list, list) {
7374 if (tmp->found_rec == 0)
7376 key.objectid = tmp->start;
7377 key.type = BTRFS_EXTENT_ITEM_KEY;
7378 key.offset = tmp->nr;
7380 /* Shouldn't happen but just in case */
7381 if (tmp->metadata) {
7382 fprintf(stderr, "Well this shouldn't happen, extent "
7383 "record overlaps but is metadata? "
7384 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7388 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7394 ret = btrfs_del_item(trans, root, path);
7397 btrfs_release_path(path);
7400 err = btrfs_commit_transaction(trans, root);
7404 while (!list_empty(&delete_list)) {
7405 tmp = to_extent_record(delete_list.next);
7406 list_del_init(&tmp->list);
7412 while (!list_empty(&rec->dups)) {
7413 tmp = to_extent_record(rec->dups.next);
7414 list_del_init(&tmp->list);
7418 btrfs_free_path(path);
7420 if (!ret && !nr_del)
7421 rec->num_duplicates = 0;
7423 return ret ? ret : nr_del;
7426 static int find_possible_backrefs(struct btrfs_fs_info *info,
7427 struct btrfs_path *path,
7428 struct cache_tree *extent_cache,
7429 struct extent_record *rec)
7431 struct btrfs_root *root;
7432 struct extent_backref *back, *tmp;
7433 struct data_backref *dback;
7434 struct cache_extent *cache;
7435 struct btrfs_file_extent_item *fi;
7436 struct btrfs_key key;
7440 rbtree_postorder_for_each_entry_safe(back, tmp,
7441 &rec->backref_tree, node) {
7442 /* Don't care about full backrefs (poor unloved backrefs) */
7443 if (back->full_backref || !back->is_data)
7446 dback = to_data_backref(back);
7448 /* We found this one, we don't need to do a lookup */
7449 if (dback->found_ref)
7452 key.objectid = dback->root;
7453 key.type = BTRFS_ROOT_ITEM_KEY;
7454 key.offset = (u64)-1;
7456 root = btrfs_read_fs_root(info, &key);
7458 /* No root, definitely a bad ref, skip */
7459 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7461 /* Other err, exit */
7463 return PTR_ERR(root);
7465 key.objectid = dback->owner;
7466 key.type = BTRFS_EXTENT_DATA_KEY;
7467 key.offset = dback->offset;
7468 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7470 btrfs_release_path(path);
7473 /* Didn't find it, we can carry on */
7478 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7479 struct btrfs_file_extent_item);
7480 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7481 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7482 btrfs_release_path(path);
7483 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7485 struct extent_record *tmp;
7486 tmp = container_of(cache, struct extent_record, cache);
7489 * If we found an extent record for the bytenr for this
7490 * particular backref then we can't add it to our
7491 * current extent record. We only want to add backrefs
7492 * that don't have a corresponding extent item in the
7493 * extent tree since they likely belong to this record
7494 * and we need to fix it if it doesn't match bytenrs.
7500 dback->found_ref += 1;
7501 dback->disk_bytenr = bytenr;
7502 dback->bytes = bytes;
7505 * Set this so the verify backref code knows not to trust the
7506 * values in this backref.
7515 * Record orphan data ref into corresponding root.
7517 * Return 0 if the extent item contains data ref and recorded.
7518 * Return 1 if the extent item contains no useful data ref
7519 * On that case, it may contains only shared_dataref or metadata backref
7520 * or the file extent exists(this should be handled by the extent bytenr
7522 * Return <0 if something goes wrong.
7524 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7525 struct extent_record *rec)
7527 struct btrfs_key key;
7528 struct btrfs_root *dest_root;
7529 struct extent_backref *back, *tmp;
7530 struct data_backref *dback;
7531 struct orphan_data_extent *orphan;
7532 struct btrfs_path *path;
7533 int recorded_data_ref = 0;
7538 path = btrfs_alloc_path();
7541 rbtree_postorder_for_each_entry_safe(back, tmp,
7542 &rec->backref_tree, node) {
7543 if (back->full_backref || !back->is_data ||
7544 !back->found_extent_tree)
7546 dback = to_data_backref(back);
7547 if (dback->found_ref)
7549 key.objectid = dback->root;
7550 key.type = BTRFS_ROOT_ITEM_KEY;
7551 key.offset = (u64)-1;
7553 dest_root = btrfs_read_fs_root(fs_info, &key);
7555 /* For non-exist root we just skip it */
7556 if (IS_ERR(dest_root) || !dest_root)
7559 key.objectid = dback->owner;
7560 key.type = BTRFS_EXTENT_DATA_KEY;
7561 key.offset = dback->offset;
7563 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7565 * For ret < 0, it's OK since the fs-tree may be corrupted,
7566 * we need to record it for inode/file extent rebuild.
7567 * For ret > 0, we record it only for file extent rebuild.
7568 * For ret == 0, the file extent exists but only bytenr
7569 * mismatch, let the original bytenr fix routine to handle,
7575 orphan = malloc(sizeof(*orphan));
7580 INIT_LIST_HEAD(&orphan->list);
7581 orphan->root = dback->root;
7582 orphan->objectid = dback->owner;
7583 orphan->offset = dback->offset;
7584 orphan->disk_bytenr = rec->cache.start;
7585 orphan->disk_len = rec->cache.size;
7586 list_add(&dest_root->orphan_data_extents, &orphan->list);
7587 recorded_data_ref = 1;
7590 btrfs_free_path(path);
7592 return !recorded_data_ref;
7598 * when an incorrect extent item is found, this will delete
7599 * all of the existing entries for it and recreate them
7600 * based on what the tree scan found.
7602 static int fixup_extent_refs(struct btrfs_fs_info *info,
7603 struct cache_tree *extent_cache,
7604 struct extent_record *rec)
7606 struct btrfs_trans_handle *trans = NULL;
7608 struct btrfs_path *path;
7609 struct cache_extent *cache;
7610 struct extent_backref *back, *tmp;
7614 if (rec->flag_block_full_backref)
7615 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7617 path = btrfs_alloc_path();
7621 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7623 * Sometimes the backrefs themselves are so broken they don't
7624 * get attached to any meaningful rec, so first go back and
7625 * check any of our backrefs that we couldn't find and throw
7626 * them into the list if we find the backref so that
7627 * verify_backrefs can figure out what to do.
7629 ret = find_possible_backrefs(info, path, extent_cache, rec);
7634 /* step one, make sure all of the backrefs agree */
7635 ret = verify_backrefs(info, path, rec);
7639 trans = btrfs_start_transaction(info->extent_root, 1);
7640 if (IS_ERR(trans)) {
7641 ret = PTR_ERR(trans);
7645 /* step two, delete all the existing records */
7646 ret = delete_extent_records(trans, info->extent_root, path,
7647 rec->start, rec->max_size);
7652 /* was this block corrupt? If so, don't add references to it */
7653 cache = lookup_cache_extent(info->corrupt_blocks,
7654 rec->start, rec->max_size);
7660 /* step three, recreate all the refs we did find */
7661 rbtree_postorder_for_each_entry_safe(back, tmp,
7662 &rec->backref_tree, node) {
7664 * if we didn't find any references, don't create a
7667 if (!back->found_ref)
7670 rec->bad_full_backref = 0;
7671 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7679 int err = btrfs_commit_transaction(trans, info->extent_root);
7684 btrfs_free_path(path);
7688 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7689 struct extent_record *rec)
7691 struct btrfs_trans_handle *trans;
7692 struct btrfs_root *root = fs_info->extent_root;
7693 struct btrfs_path *path;
7694 struct btrfs_extent_item *ei;
7695 struct btrfs_key key;
7699 key.objectid = rec->start;
7700 if (rec->metadata) {
7701 key.type = BTRFS_METADATA_ITEM_KEY;
7702 key.offset = rec->info_level;
7704 key.type = BTRFS_EXTENT_ITEM_KEY;
7705 key.offset = rec->max_size;
7708 path = btrfs_alloc_path();
7712 trans = btrfs_start_transaction(root, 0);
7713 if (IS_ERR(trans)) {
7714 btrfs_free_path(path);
7715 return PTR_ERR(trans);
7718 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7720 btrfs_free_path(path);
7721 btrfs_commit_transaction(trans, root);
7724 fprintf(stderr, "Didn't find extent for %llu\n",
7725 (unsigned long long)rec->start);
7726 btrfs_free_path(path);
7727 btrfs_commit_transaction(trans, root);
7731 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7732 struct btrfs_extent_item);
7733 flags = btrfs_extent_flags(path->nodes[0], ei);
7734 if (rec->flag_block_full_backref) {
7735 fprintf(stderr, "setting full backref on %llu\n",
7736 (unsigned long long)key.objectid);
7737 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7739 fprintf(stderr, "clearing full backref on %llu\n",
7740 (unsigned long long)key.objectid);
7741 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7743 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7744 btrfs_mark_buffer_dirty(path->nodes[0]);
7745 btrfs_free_path(path);
7746 return btrfs_commit_transaction(trans, root);
7749 /* right now we only prune from the extent allocation tree */
7750 static int prune_one_block(struct btrfs_trans_handle *trans,
7751 struct btrfs_fs_info *info,
7752 struct btrfs_corrupt_block *corrupt)
7755 struct btrfs_path path;
7756 struct extent_buffer *eb;
7760 int level = corrupt->level + 1;
7762 btrfs_init_path(&path);
7764 /* we want to stop at the parent to our busted block */
7765 path.lowest_level = level;
7767 ret = btrfs_search_slot(trans, info->extent_root,
7768 &corrupt->key, &path, -1, 1);
7773 eb = path.nodes[level];
7780 * hopefully the search gave us the block we want to prune,
7781 * lets try that first
7783 slot = path.slots[level];
7784 found = btrfs_node_blockptr(eb, slot);
7785 if (found == corrupt->cache.start)
7788 nritems = btrfs_header_nritems(eb);
7790 /* the search failed, lets scan this node and hope we find it */
7791 for (slot = 0; slot < nritems; slot++) {
7792 found = btrfs_node_blockptr(eb, slot);
7793 if (found == corrupt->cache.start)
7797 * we couldn't find the bad block. TODO, search all the nodes for pointers
7800 if (eb == info->extent_root->node) {
7805 btrfs_release_path(&path);
7810 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7811 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7814 btrfs_release_path(&path);
7818 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7820 struct btrfs_trans_handle *trans = NULL;
7821 struct cache_extent *cache;
7822 struct btrfs_corrupt_block *corrupt;
7825 cache = search_cache_extent(info->corrupt_blocks, 0);
7829 trans = btrfs_start_transaction(info->extent_root, 1);
7831 return PTR_ERR(trans);
7833 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7834 prune_one_block(trans, info, corrupt);
7835 remove_cache_extent(info->corrupt_blocks, cache);
7838 return btrfs_commit_transaction(trans, info->extent_root);
7842 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7844 struct btrfs_block_group_cache *cache;
7849 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7850 &start, &end, EXTENT_DIRTY);
7853 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7859 cache = btrfs_lookup_first_block_group(fs_info, start);
7864 start = cache->key.objectid + cache->key.offset;
7868 static int check_extent_refs(struct btrfs_root *root,
7869 struct cache_tree *extent_cache)
7871 struct extent_record *rec;
7872 struct cache_extent *cache;
7881 * if we're doing a repair, we have to make sure
7882 * we don't allocate from the problem extents.
7883 * In the worst case, this will be all the
7886 cache = search_cache_extent(extent_cache, 0);
7888 rec = container_of(cache, struct extent_record, cache);
7889 set_extent_dirty(root->fs_info->excluded_extents,
7891 rec->start + rec->max_size - 1,
7893 cache = next_cache_extent(cache);
7896 /* pin down all the corrupted blocks too */
7897 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7899 set_extent_dirty(root->fs_info->excluded_extents,
7901 cache->start + cache->size - 1,
7903 cache = next_cache_extent(cache);
7905 prune_corrupt_blocks(root->fs_info);
7906 reset_cached_block_groups(root->fs_info);
7909 reset_cached_block_groups(root->fs_info);
7912 * We need to delete any duplicate entries we find first otherwise we
7913 * could mess up the extent tree when we have backrefs that actually
7914 * belong to a different extent item and not the weird duplicate one.
7916 while (repair && !list_empty(&duplicate_extents)) {
7917 rec = to_extent_record(duplicate_extents.next);
7918 list_del_init(&rec->list);
7920 /* Sometimes we can find a backref before we find an actual
7921 * extent, so we need to process it a little bit to see if there
7922 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7923 * if this is a backref screwup. If we need to delete stuff
7924 * process_duplicates() will return 0, otherwise it will return
7927 if (process_duplicates(root, extent_cache, rec))
7929 ret = delete_duplicate_records(root, rec);
7933 * delete_duplicate_records will return the number of entries
7934 * deleted, so if it's greater than 0 then we know we actually
7935 * did something and we need to remove.
7949 cache = search_cache_extent(extent_cache, 0);
7952 rec = container_of(cache, struct extent_record, cache);
7953 if (rec->num_duplicates) {
7954 fprintf(stderr, "extent item %llu has multiple extent "
7955 "items\n", (unsigned long long)rec->start);
7960 if (rec->refs != rec->extent_item_refs) {
7961 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7962 (unsigned long long)rec->start,
7963 (unsigned long long)rec->nr);
7964 fprintf(stderr, "extent item %llu, found %llu\n",
7965 (unsigned long long)rec->extent_item_refs,
7966 (unsigned long long)rec->refs);
7967 ret = record_orphan_data_extents(root->fs_info, rec);
7974 * we can't use the extent to repair file
7975 * extent, let the fallback method handle it.
7977 if (!fixed && repair) {
7978 ret = fixup_extent_refs(
7989 if (all_backpointers_checked(rec, 1)) {
7990 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7991 (unsigned long long)rec->start,
7992 (unsigned long long)rec->nr);
7994 if (!fixed && !recorded && repair) {
7995 ret = fixup_extent_refs(root->fs_info,
8004 if (!rec->owner_ref_checked) {
8005 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8006 (unsigned long long)rec->start,
8007 (unsigned long long)rec->nr);
8008 if (!fixed && !recorded && repair) {
8009 ret = fixup_extent_refs(root->fs_info,
8018 if (rec->bad_full_backref) {
8019 fprintf(stderr, "bad full backref, on [%llu]\n",
8020 (unsigned long long)rec->start);
8022 ret = fixup_extent_flags(root->fs_info, rec);
8031 * Although it's not a extent ref's problem, we reuse this
8032 * routine for error reporting.
8033 * No repair function yet.
8035 if (rec->crossing_stripes) {
8037 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8038 rec->start, rec->start + rec->max_size);
8043 if (rec->wrong_chunk_type) {
8045 "bad extent [%llu, %llu), type mismatch with chunk\n",
8046 rec->start, rec->start + rec->max_size);
8051 remove_cache_extent(extent_cache, cache);
8052 free_all_extent_backrefs(rec);
8053 if (!init_extent_tree && repair && (!cur_err || fixed))
8054 clear_extent_dirty(root->fs_info->excluded_extents,
8056 rec->start + rec->max_size - 1,
8062 if (ret && ret != -EAGAIN) {
8063 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8066 struct btrfs_trans_handle *trans;
8068 root = root->fs_info->extent_root;
8069 trans = btrfs_start_transaction(root, 1);
8070 if (IS_ERR(trans)) {
8071 ret = PTR_ERR(trans);
8075 btrfs_fix_block_accounting(trans, root);
8076 ret = btrfs_commit_transaction(trans, root);
8081 fprintf(stderr, "repaired damaged extent references\n");
8087 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8091 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8092 stripe_size = length;
8093 stripe_size /= num_stripes;
8094 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8095 stripe_size = length * 2;
8096 stripe_size /= num_stripes;
8097 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8098 stripe_size = length;
8099 stripe_size /= (num_stripes - 1);
8100 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8101 stripe_size = length;
8102 stripe_size /= (num_stripes - 2);
8104 stripe_size = length;
8110 * Check the chunk with its block group/dev list ref:
8111 * Return 0 if all refs seems valid.
8112 * Return 1 if part of refs seems valid, need later check for rebuild ref
8113 * like missing block group and needs to search extent tree to rebuild them.
8114 * Return -1 if essential refs are missing and unable to rebuild.
8116 static int check_chunk_refs(struct chunk_record *chunk_rec,
8117 struct block_group_tree *block_group_cache,
8118 struct device_extent_tree *dev_extent_cache,
8121 struct cache_extent *block_group_item;
8122 struct block_group_record *block_group_rec;
8123 struct cache_extent *dev_extent_item;
8124 struct device_extent_record *dev_extent_rec;
8128 int metadump_v2 = 0;
8132 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8135 if (block_group_item) {
8136 block_group_rec = container_of(block_group_item,
8137 struct block_group_record,
8139 if (chunk_rec->length != block_group_rec->offset ||
8140 chunk_rec->offset != block_group_rec->objectid ||
8142 chunk_rec->type_flags != block_group_rec->flags)) {
8145 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8146 chunk_rec->objectid,
8151 chunk_rec->type_flags,
8152 block_group_rec->objectid,
8153 block_group_rec->type,
8154 block_group_rec->offset,
8155 block_group_rec->offset,
8156 block_group_rec->objectid,
8157 block_group_rec->flags);
8160 list_del_init(&block_group_rec->list);
8161 chunk_rec->bg_rec = block_group_rec;
8166 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8167 chunk_rec->objectid,
8172 chunk_rec->type_flags);
8179 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8180 chunk_rec->num_stripes);
8181 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8182 devid = chunk_rec->stripes[i].devid;
8183 offset = chunk_rec->stripes[i].offset;
8184 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8185 devid, offset, length);
8186 if (dev_extent_item) {
8187 dev_extent_rec = container_of(dev_extent_item,
8188 struct device_extent_record,
8190 if (dev_extent_rec->objectid != devid ||
8191 dev_extent_rec->offset != offset ||
8192 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8193 dev_extent_rec->length != length) {
8196 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8197 chunk_rec->objectid,
8200 chunk_rec->stripes[i].devid,
8201 chunk_rec->stripes[i].offset,
8202 dev_extent_rec->objectid,
8203 dev_extent_rec->offset,
8204 dev_extent_rec->length);
8207 list_move(&dev_extent_rec->chunk_list,
8208 &chunk_rec->dextents);
8213 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8214 chunk_rec->objectid,
8217 chunk_rec->stripes[i].devid,
8218 chunk_rec->stripes[i].offset);
8225 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8226 int check_chunks(struct cache_tree *chunk_cache,
8227 struct block_group_tree *block_group_cache,
8228 struct device_extent_tree *dev_extent_cache,
8229 struct list_head *good, struct list_head *bad,
8230 struct list_head *rebuild, int silent)
8232 struct cache_extent *chunk_item;
8233 struct chunk_record *chunk_rec;
8234 struct block_group_record *bg_rec;
8235 struct device_extent_record *dext_rec;
8239 chunk_item = first_cache_extent(chunk_cache);
8240 while (chunk_item) {
8241 chunk_rec = container_of(chunk_item, struct chunk_record,
8243 err = check_chunk_refs(chunk_rec, block_group_cache,
8244 dev_extent_cache, silent);
8247 if (err == 0 && good)
8248 list_add_tail(&chunk_rec->list, good);
8249 if (err > 0 && rebuild)
8250 list_add_tail(&chunk_rec->list, rebuild);
8252 list_add_tail(&chunk_rec->list, bad);
8253 chunk_item = next_cache_extent(chunk_item);
8256 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8259 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8267 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8271 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8282 static int check_device_used(struct device_record *dev_rec,
8283 struct device_extent_tree *dext_cache)
8285 struct cache_extent *cache;
8286 struct device_extent_record *dev_extent_rec;
8289 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8291 dev_extent_rec = container_of(cache,
8292 struct device_extent_record,
8294 if (dev_extent_rec->objectid != dev_rec->devid)
8297 list_del_init(&dev_extent_rec->device_list);
8298 total_byte += dev_extent_rec->length;
8299 cache = next_cache_extent(cache);
8302 if (total_byte != dev_rec->byte_used) {
8304 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8305 total_byte, dev_rec->byte_used, dev_rec->objectid,
8306 dev_rec->type, dev_rec->offset);
8313 /* check btrfs_dev_item -> btrfs_dev_extent */
8314 static int check_devices(struct rb_root *dev_cache,
8315 struct device_extent_tree *dev_extent_cache)
8317 struct rb_node *dev_node;
8318 struct device_record *dev_rec;
8319 struct device_extent_record *dext_rec;
8323 dev_node = rb_first(dev_cache);
8325 dev_rec = container_of(dev_node, struct device_record, node);
8326 err = check_device_used(dev_rec, dev_extent_cache);
8330 dev_node = rb_next(dev_node);
8332 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8335 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8336 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8343 static int add_root_item_to_list(struct list_head *head,
8344 u64 objectid, u64 bytenr, u64 last_snapshot,
8345 u8 level, u8 drop_level,
8346 int level_size, struct btrfs_key *drop_key)
8349 struct root_item_record *ri_rec;
8350 ri_rec = malloc(sizeof(*ri_rec));
8353 ri_rec->bytenr = bytenr;
8354 ri_rec->objectid = objectid;
8355 ri_rec->level = level;
8356 ri_rec->level_size = level_size;
8357 ri_rec->drop_level = drop_level;
8358 ri_rec->last_snapshot = last_snapshot;
8360 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8361 list_add_tail(&ri_rec->list, head);
8366 static void free_root_item_list(struct list_head *list)
8368 struct root_item_record *ri_rec;
8370 while (!list_empty(list)) {
8371 ri_rec = list_first_entry(list, struct root_item_record,
8373 list_del_init(&ri_rec->list);
8378 static int deal_root_from_list(struct list_head *list,
8379 struct btrfs_root *root,
8380 struct block_info *bits,
8382 struct cache_tree *pending,
8383 struct cache_tree *seen,
8384 struct cache_tree *reada,
8385 struct cache_tree *nodes,
8386 struct cache_tree *extent_cache,
8387 struct cache_tree *chunk_cache,
8388 struct rb_root *dev_cache,
8389 struct block_group_tree *block_group_cache,
8390 struct device_extent_tree *dev_extent_cache)
8395 while (!list_empty(list)) {
8396 struct root_item_record *rec;
8397 struct extent_buffer *buf;
8398 rec = list_entry(list->next,
8399 struct root_item_record, list);
8401 buf = read_tree_block(root->fs_info->tree_root,
8402 rec->bytenr, rec->level_size, 0);
8403 if (!extent_buffer_uptodate(buf)) {
8404 free_extent_buffer(buf);
8408 add_root_to_pending(buf, extent_cache, pending,
8409 seen, nodes, rec->objectid);
8411 * To rebuild extent tree, we need deal with snapshot
8412 * one by one, otherwise we deal with node firstly which
8413 * can maximize readahead.
8416 ret = run_next_block(root, bits, bits_nr, &last,
8417 pending, seen, reada, nodes,
8418 extent_cache, chunk_cache,
8419 dev_cache, block_group_cache,
8420 dev_extent_cache, rec);
8424 free_extent_buffer(buf);
8425 list_del(&rec->list);
8431 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8432 reada, nodes, extent_cache, chunk_cache,
8433 dev_cache, block_group_cache,
8434 dev_extent_cache, NULL);
8444 static int check_chunks_and_extents(struct btrfs_root *root)
8446 struct rb_root dev_cache;
8447 struct cache_tree chunk_cache;
8448 struct block_group_tree block_group_cache;
8449 struct device_extent_tree dev_extent_cache;
8450 struct cache_tree extent_cache;
8451 struct cache_tree seen;
8452 struct cache_tree pending;
8453 struct cache_tree reada;
8454 struct cache_tree nodes;
8455 struct extent_io_tree excluded_extents;
8456 struct cache_tree corrupt_blocks;
8457 struct btrfs_path path;
8458 struct btrfs_key key;
8459 struct btrfs_key found_key;
8461 struct block_info *bits;
8463 struct extent_buffer *leaf;
8465 struct btrfs_root_item ri;
8466 struct list_head dropping_trees;
8467 struct list_head normal_trees;
8468 struct btrfs_root *root1;
8473 dev_cache = RB_ROOT;
8474 cache_tree_init(&chunk_cache);
8475 block_group_tree_init(&block_group_cache);
8476 device_extent_tree_init(&dev_extent_cache);
8478 cache_tree_init(&extent_cache);
8479 cache_tree_init(&seen);
8480 cache_tree_init(&pending);
8481 cache_tree_init(&nodes);
8482 cache_tree_init(&reada);
8483 cache_tree_init(&corrupt_blocks);
8484 extent_io_tree_init(&excluded_extents);
8485 INIT_LIST_HEAD(&dropping_trees);
8486 INIT_LIST_HEAD(&normal_trees);
8489 root->fs_info->excluded_extents = &excluded_extents;
8490 root->fs_info->fsck_extent_cache = &extent_cache;
8491 root->fs_info->free_extent_hook = free_extent_hook;
8492 root->fs_info->corrupt_blocks = &corrupt_blocks;
8496 bits = malloc(bits_nr * sizeof(struct block_info));
8502 if (ctx.progress_enabled) {
8503 ctx.tp = TASK_EXTENTS;
8504 task_start(ctx.info);
8508 root1 = root->fs_info->tree_root;
8509 level = btrfs_header_level(root1->node);
8510 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8511 root1->node->start, 0, level, 0,
8512 root1->nodesize, NULL);
8515 root1 = root->fs_info->chunk_root;
8516 level = btrfs_header_level(root1->node);
8517 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8518 root1->node->start, 0, level, 0,
8519 root1->nodesize, NULL);
8522 btrfs_init_path(&path);
8525 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8526 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8531 leaf = path.nodes[0];
8532 slot = path.slots[0];
8533 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8534 ret = btrfs_next_leaf(root, &path);
8537 leaf = path.nodes[0];
8538 slot = path.slots[0];
8540 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8541 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8542 unsigned long offset;
8545 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8546 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8547 last_snapshot = btrfs_root_last_snapshot(&ri);
8548 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8549 level = btrfs_root_level(&ri);
8550 level_size = root->nodesize;
8551 ret = add_root_item_to_list(&normal_trees,
8553 btrfs_root_bytenr(&ri),
8554 last_snapshot, level,
8555 0, level_size, NULL);
8559 level = btrfs_root_level(&ri);
8560 level_size = root->nodesize;
8561 objectid = found_key.objectid;
8562 btrfs_disk_key_to_cpu(&found_key,
8564 ret = add_root_item_to_list(&dropping_trees,
8566 btrfs_root_bytenr(&ri),
8567 last_snapshot, level,
8569 level_size, &found_key);
8576 btrfs_release_path(&path);
8579 * check_block can return -EAGAIN if it fixes something, please keep
8580 * this in mind when dealing with return values from these functions, if
8581 * we get -EAGAIN we want to fall through and restart the loop.
8583 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8584 &seen, &reada, &nodes, &extent_cache,
8585 &chunk_cache, &dev_cache, &block_group_cache,
8592 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8593 &pending, &seen, &reada, &nodes,
8594 &extent_cache, &chunk_cache, &dev_cache,
8595 &block_group_cache, &dev_extent_cache);
8602 ret = check_chunks(&chunk_cache, &block_group_cache,
8603 &dev_extent_cache, NULL, NULL, NULL, 0);
8610 ret = check_extent_refs(root, &extent_cache);
8617 ret = check_devices(&dev_cache, &dev_extent_cache);
8622 task_stop(ctx.info);
8624 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8625 extent_io_tree_cleanup(&excluded_extents);
8626 root->fs_info->fsck_extent_cache = NULL;
8627 root->fs_info->free_extent_hook = NULL;
8628 root->fs_info->corrupt_blocks = NULL;
8629 root->fs_info->excluded_extents = NULL;
8632 free_chunk_cache_tree(&chunk_cache);
8633 free_device_cache_tree(&dev_cache);
8634 free_block_group_tree(&block_group_cache);
8635 free_device_extent_tree(&dev_extent_cache);
8636 free_extent_cache_tree(&seen);
8637 free_extent_cache_tree(&pending);
8638 free_extent_cache_tree(&reada);
8639 free_extent_cache_tree(&nodes);
8642 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8643 free_extent_cache_tree(&seen);
8644 free_extent_cache_tree(&pending);
8645 free_extent_cache_tree(&reada);
8646 free_extent_cache_tree(&nodes);
8647 free_chunk_cache_tree(&chunk_cache);
8648 free_block_group_tree(&block_group_cache);
8649 free_device_cache_tree(&dev_cache);
8650 free_device_extent_tree(&dev_extent_cache);
8651 free_extent_record_cache(root->fs_info, &extent_cache);
8652 free_root_item_list(&normal_trees);
8653 free_root_item_list(&dropping_trees);
8654 extent_io_tree_cleanup(&excluded_extents);
8659 * Check backrefs of a tree block given by @bytenr or @eb.
8661 * @root: the root containing the @bytenr or @eb
8662 * @eb: tree block extent buffer, can be NULL
8663 * @bytenr: bytenr of the tree block to search
8664 * @level: tree level of the tree block
8665 * @owner: owner of the tree block
8667 * Return >0 for any error found and output error message
8668 * Return 0 for no error found
8670 static int check_tree_block_ref(struct btrfs_root *root,
8671 struct extent_buffer *eb, u64 bytenr,
8672 int level, u64 owner)
8674 struct btrfs_key key;
8675 struct btrfs_root *extent_root = root->fs_info->extent_root;
8676 struct btrfs_path path;
8677 struct btrfs_extent_item *ei;
8678 struct btrfs_extent_inline_ref *iref;
8679 struct extent_buffer *leaf;
8685 u32 nodesize = root->nodesize;
8692 btrfs_init_path(&path);
8693 key.objectid = bytenr;
8694 if (btrfs_fs_incompat(root->fs_info,
8695 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8696 key.type = BTRFS_METADATA_ITEM_KEY;
8698 key.type = BTRFS_EXTENT_ITEM_KEY;
8699 key.offset = (u64)-1;
8701 /* Search for the backref in extent tree */
8702 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8704 err |= BACKREF_MISSING;
8707 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8709 err |= BACKREF_MISSING;
8713 leaf = path.nodes[0];
8714 slot = path.slots[0];
8715 btrfs_item_key_to_cpu(leaf, &key, slot);
8717 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8719 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8720 skinny_level = (int)key.offset;
8721 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8723 struct btrfs_tree_block_info *info;
8725 info = (struct btrfs_tree_block_info *)(ei + 1);
8726 skinny_level = btrfs_tree_block_level(leaf, info);
8727 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8734 if (!(btrfs_extent_flags(leaf, ei) &
8735 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8737 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8738 key.objectid, nodesize,
8739 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8740 err = BACKREF_MISMATCH;
8742 header_gen = btrfs_header_generation(eb);
8743 extent_gen = btrfs_extent_generation(leaf, ei);
8744 if (header_gen != extent_gen) {
8746 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8747 key.objectid, nodesize, header_gen,
8749 err = BACKREF_MISMATCH;
8751 if (level != skinny_level) {
8753 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8754 key.objectid, nodesize, level, skinny_level);
8755 err = BACKREF_MISMATCH;
8757 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8759 "extent[%llu %u] is referred by other roots than %llu",
8760 key.objectid, nodesize, root->objectid);
8761 err = BACKREF_MISMATCH;
8766 * Iterate the extent/metadata item to find the exact backref
8768 item_size = btrfs_item_size_nr(leaf, slot);
8769 ptr = (unsigned long)iref;
8770 end = (unsigned long)ei + item_size;
8772 iref = (struct btrfs_extent_inline_ref *)ptr;
8773 type = btrfs_extent_inline_ref_type(leaf, iref);
8774 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8776 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8777 (offset == root->objectid || offset == owner)) {
8779 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8780 /* Check if the backref points to valid referencer */
8781 found_ref = !check_tree_block_ref(root, NULL, offset,
8787 ptr += btrfs_extent_inline_ref_size(type);
8791 * Inlined extent item doesn't have what we need, check
8792 * TREE_BLOCK_REF_KEY
8795 btrfs_release_path(&path);
8796 key.objectid = bytenr;
8797 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8798 key.offset = root->objectid;
8800 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8805 err |= BACKREF_MISSING;
8807 btrfs_release_path(&path);
8808 if (eb && (err & BACKREF_MISSING))
8809 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8810 bytenr, nodesize, owner, level);
8815 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8817 * Return >0 any error found and output error message
8818 * Return 0 for no error found
8820 static int check_extent_data_item(struct btrfs_root *root,
8821 struct extent_buffer *eb, int slot)
8823 struct btrfs_file_extent_item *fi;
8824 struct btrfs_path path;
8825 struct btrfs_root *extent_root = root->fs_info->extent_root;
8826 struct btrfs_key fi_key;
8827 struct btrfs_key dbref_key;
8828 struct extent_buffer *leaf;
8829 struct btrfs_extent_item *ei;
8830 struct btrfs_extent_inline_ref *iref;
8831 struct btrfs_extent_data_ref *dref;
8833 u64 file_extent_gen;
8836 u64 extent_num_bytes;
8844 int found_dbackref = 0;
8848 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8849 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8850 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8852 /* Nothing to check for hole and inline data extents */
8853 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8854 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8857 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8858 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8859 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8861 /* Check unaligned disk_num_bytes and num_bytes */
8862 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8864 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8865 fi_key.objectid, fi_key.offset, disk_num_bytes,
8867 err |= BYTES_UNALIGNED;
8869 data_bytes_allocated += disk_num_bytes;
8871 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8873 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8874 fi_key.objectid, fi_key.offset, extent_num_bytes,
8876 err |= BYTES_UNALIGNED;
8878 data_bytes_referenced += extent_num_bytes;
8880 owner = btrfs_header_owner(eb);
8882 /* Check the extent item of the file extent in extent tree */
8883 btrfs_init_path(&path);
8884 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8885 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8886 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8888 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8890 err |= BACKREF_MISSING;
8894 leaf = path.nodes[0];
8895 slot = path.slots[0];
8896 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8898 extent_flags = btrfs_extent_flags(leaf, ei);
8899 extent_gen = btrfs_extent_generation(leaf, ei);
8901 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8903 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8904 disk_bytenr, disk_num_bytes,
8905 BTRFS_EXTENT_FLAG_DATA);
8906 err |= BACKREF_MISMATCH;
8909 if (file_extent_gen < extent_gen) {
8911 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8912 disk_bytenr, disk_num_bytes, file_extent_gen,
8914 err |= BACKREF_MISMATCH;
8917 /* Check data backref inside that extent item */
8918 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8919 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8920 ptr = (unsigned long)iref;
8921 end = (unsigned long)ei + item_size;
8923 iref = (struct btrfs_extent_inline_ref *)ptr;
8924 type = btrfs_extent_inline_ref_type(leaf, iref);
8925 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8927 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8928 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8929 if (ref_root == owner || ref_root == root->objectid)
8931 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8932 found_dbackref = !check_tree_block_ref(root, NULL,
8933 btrfs_extent_inline_ref_offset(leaf, iref),
8939 ptr += btrfs_extent_inline_ref_size(type);
8942 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8943 if (!found_dbackref) {
8944 btrfs_release_path(&path);
8946 btrfs_init_path(&path);
8947 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8948 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8949 dbref_key.offset = hash_extent_data_ref(root->objectid,
8950 fi_key.objectid, fi_key.offset);
8952 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8953 &dbref_key, &path, 0, 0);
8958 if (!found_dbackref)
8959 err |= BACKREF_MISSING;
8961 btrfs_release_path(&path);
8962 if (err & BACKREF_MISSING) {
8963 error("data extent[%llu %llu] backref lost",
8964 disk_bytenr, disk_num_bytes);
8970 * Get real tree block level for the case like shared block
8971 * Return >= 0 as tree level
8972 * Return <0 for error
8974 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8976 struct extent_buffer *eb;
8977 struct btrfs_path path;
8978 struct btrfs_key key;
8979 struct btrfs_extent_item *ei;
8982 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8987 /* Search extent tree for extent generation and level */
8988 key.objectid = bytenr;
8989 key.type = BTRFS_METADATA_ITEM_KEY;
8990 key.offset = (u64)-1;
8992 btrfs_init_path(&path);
8993 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8996 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9004 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9005 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9006 struct btrfs_extent_item);
9007 flags = btrfs_extent_flags(path.nodes[0], ei);
9008 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9013 /* Get transid for later read_tree_block() check */
9014 transid = btrfs_extent_generation(path.nodes[0], ei);
9016 /* Get backref level as one source */
9017 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9018 backref_level = key.offset;
9020 struct btrfs_tree_block_info *info;
9022 info = (struct btrfs_tree_block_info *)(ei + 1);
9023 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9025 btrfs_release_path(&path);
9027 /* Get level from tree block as an alternative source */
9028 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9029 if (!extent_buffer_uptodate(eb)) {
9030 free_extent_buffer(eb);
9033 header_level = btrfs_header_level(eb);
9034 free_extent_buffer(eb);
9036 if (header_level != backref_level)
9038 return header_level;
9041 btrfs_release_path(&path);
9046 * Check if a tree block backref is valid (points to a valid tree block)
9047 * if level == -1, level will be resolved
9048 * Return >0 for any error found and print error message
9050 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9051 u64 bytenr, int level)
9053 struct btrfs_root *root;
9054 struct btrfs_key key;
9055 struct btrfs_path path;
9056 struct extent_buffer *eb;
9057 struct extent_buffer *node;
9058 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9062 /* Query level for level == -1 special case */
9064 level = query_tree_block_level(fs_info, bytenr);
9066 err |= REFERENCER_MISSING;
9070 key.objectid = root_id;
9071 key.type = BTRFS_ROOT_ITEM_KEY;
9072 key.offset = (u64)-1;
9074 root = btrfs_read_fs_root(fs_info, &key);
9076 err |= REFERENCER_MISSING;
9080 /* Read out the tree block to get item/node key */
9081 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9082 if (!extent_buffer_uptodate(eb)) {
9083 err |= REFERENCER_MISSING;
9084 free_extent_buffer(eb);
9088 /* Empty tree, no need to check key */
9089 if (!btrfs_header_nritems(eb) && !level) {
9090 free_extent_buffer(eb);
9095 btrfs_node_key_to_cpu(eb, &key, 0);
9097 btrfs_item_key_to_cpu(eb, &key, 0);
9099 free_extent_buffer(eb);
9101 btrfs_init_path(&path);
9102 /* Search with the first key, to ensure we can reach it */
9103 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9105 err |= REFERENCER_MISSING;
9109 node = path.nodes[level];
9110 if (btrfs_header_bytenr(node) != bytenr) {
9112 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9113 bytenr, nodesize, bytenr,
9114 btrfs_header_bytenr(node));
9115 err |= REFERENCER_MISMATCH;
9117 if (btrfs_header_level(node) != level) {
9119 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9120 bytenr, nodesize, level,
9121 btrfs_header_level(node));
9122 err |= REFERENCER_MISMATCH;
9126 btrfs_release_path(&path);
9128 if (err & REFERENCER_MISSING) {
9130 error("extent [%llu %d] lost referencer (owner: %llu)",
9131 bytenr, nodesize, root_id);
9134 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9135 bytenr, nodesize, root_id, level);
9142 * Check referencer for shared block backref
9143 * If level == -1, this function will resolve the level.
9145 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9146 u64 parent, u64 bytenr, int level)
9148 struct extent_buffer *eb;
9149 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9151 int found_parent = 0;
9154 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9155 if (!extent_buffer_uptodate(eb))
9159 level = query_tree_block_level(fs_info, bytenr);
9163 if (level + 1 != btrfs_header_level(eb))
9166 nr = btrfs_header_nritems(eb);
9167 for (i = 0; i < nr; i++) {
9168 if (bytenr == btrfs_node_blockptr(eb, i)) {
9174 free_extent_buffer(eb);
9175 if (!found_parent) {
9177 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9178 bytenr, nodesize, parent, level);
9179 return REFERENCER_MISSING;
9185 * Check referencer for normal (inlined) data ref
9186 * If len == 0, it will be resolved by searching in extent tree
9188 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9189 u64 root_id, u64 objectid, u64 offset,
9190 u64 bytenr, u64 len, u32 count)
9192 struct btrfs_root *root;
9193 struct btrfs_root *extent_root = fs_info->extent_root;
9194 struct btrfs_key key;
9195 struct btrfs_path path;
9196 struct extent_buffer *leaf;
9197 struct btrfs_file_extent_item *fi;
9198 u32 found_count = 0;
9203 key.objectid = bytenr;
9204 key.type = BTRFS_EXTENT_ITEM_KEY;
9205 key.offset = (u64)-1;
9207 btrfs_init_path(&path);
9208 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9211 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9214 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9215 if (key.objectid != bytenr ||
9216 key.type != BTRFS_EXTENT_ITEM_KEY)
9219 btrfs_release_path(&path);
9221 key.objectid = root_id;
9222 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9223 key.offset = (u64)-1;
9224 btrfs_init_path(&path);
9226 root = btrfs_read_fs_root(fs_info, &key);
9230 key.objectid = objectid;
9231 key.type = BTRFS_EXTENT_DATA_KEY;
9233 * It can be nasty as data backref offset is
9234 * file offset - file extent offset, which is smaller or
9235 * equal to original backref offset. The only special case is
9236 * overflow. So we need to special check and do further search.
9238 key.offset = offset & (1ULL << 63) ? 0 : offset;
9240 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9245 * Search afterwards to get correct one
9246 * NOTE: As we must do a comprehensive check on the data backref to
9247 * make sure the dref count also matches, we must iterate all file
9248 * extents for that inode.
9251 leaf = path.nodes[0];
9252 slot = path.slots[0];
9254 btrfs_item_key_to_cpu(leaf, &key, slot);
9255 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9257 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9259 * Except normal disk bytenr and disk num bytes, we still
9260 * need to do extra check on dbackref offset as
9261 * dbackref offset = file_offset - file_extent_offset
9263 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9264 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9265 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9269 ret = btrfs_next_item(root, &path);
9274 btrfs_release_path(&path);
9275 if (found_count != count) {
9277 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9278 bytenr, len, root_id, objectid, offset, count, found_count);
9279 return REFERENCER_MISSING;
9285 * Check if the referencer of a shared data backref exists
9287 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9288 u64 parent, u64 bytenr)
9290 struct extent_buffer *eb;
9291 struct btrfs_key key;
9292 struct btrfs_file_extent_item *fi;
9293 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9295 int found_parent = 0;
9298 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9299 if (!extent_buffer_uptodate(eb))
9302 nr = btrfs_header_nritems(eb);
9303 for (i = 0; i < nr; i++) {
9304 btrfs_item_key_to_cpu(eb, &key, i);
9305 if (key.type != BTRFS_EXTENT_DATA_KEY)
9308 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9309 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9312 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9319 free_extent_buffer(eb);
9320 if (!found_parent) {
9321 error("shared extent %llu referencer lost (parent: %llu)",
9323 return REFERENCER_MISSING;
9329 * This function will check a given extent item, including its backref and
9330 * itself (like crossing stripe boundary and type)
9332 * Since we don't use extent_record anymore, introduce new error bit
9334 static int check_extent_item(struct btrfs_fs_info *fs_info,
9335 struct extent_buffer *eb, int slot)
9337 struct btrfs_extent_item *ei;
9338 struct btrfs_extent_inline_ref *iref;
9339 struct btrfs_extent_data_ref *dref;
9343 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9344 u32 item_size = btrfs_item_size_nr(eb, slot);
9349 struct btrfs_key key;
9353 btrfs_item_key_to_cpu(eb, &key, slot);
9354 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9355 bytes_used += key.offset;
9357 bytes_used += nodesize;
9359 if (item_size < sizeof(*ei)) {
9361 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9362 * old thing when on disk format is still un-determined.
9363 * No need to care about it anymore
9365 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9369 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9370 flags = btrfs_extent_flags(eb, ei);
9372 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9374 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9375 error("bad metadata [%llu, %llu) crossing stripe boundary",
9376 key.objectid, key.objectid + nodesize);
9377 err |= CROSSING_STRIPE_BOUNDARY;
9380 ptr = (unsigned long)(ei + 1);
9382 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9383 /* Old EXTENT_ITEM metadata */
9384 struct btrfs_tree_block_info *info;
9386 info = (struct btrfs_tree_block_info *)ptr;
9387 level = btrfs_tree_block_level(eb, info);
9388 ptr += sizeof(struct btrfs_tree_block_info);
9390 /* New METADATA_ITEM */
9393 end = (unsigned long)ei + item_size;
9396 err |= ITEM_SIZE_MISMATCH;
9400 /* Now check every backref in this extent item */
9402 iref = (struct btrfs_extent_inline_ref *)ptr;
9403 type = btrfs_extent_inline_ref_type(eb, iref);
9404 offset = btrfs_extent_inline_ref_offset(eb, iref);
9406 case BTRFS_TREE_BLOCK_REF_KEY:
9407 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9411 case BTRFS_SHARED_BLOCK_REF_KEY:
9412 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9416 case BTRFS_EXTENT_DATA_REF_KEY:
9417 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9418 ret = check_extent_data_backref(fs_info,
9419 btrfs_extent_data_ref_root(eb, dref),
9420 btrfs_extent_data_ref_objectid(eb, dref),
9421 btrfs_extent_data_ref_offset(eb, dref),
9422 key.objectid, key.offset,
9423 btrfs_extent_data_ref_count(eb, dref));
9426 case BTRFS_SHARED_DATA_REF_KEY:
9427 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9431 error("extent[%llu %d %llu] has unknown ref type: %d",
9432 key.objectid, key.type, key.offset, type);
9433 err |= UNKNOWN_TYPE;
9437 ptr += btrfs_extent_inline_ref_size(type);
9446 * Check if a dev extent item is referred correctly by its chunk
9448 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9449 struct extent_buffer *eb, int slot)
9451 struct btrfs_root *chunk_root = fs_info->chunk_root;
9452 struct btrfs_dev_extent *ptr;
9453 struct btrfs_path path;
9454 struct btrfs_key chunk_key;
9455 struct btrfs_key devext_key;
9456 struct btrfs_chunk *chunk;
9457 struct extent_buffer *l;
9461 int found_chunk = 0;
9464 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9465 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9466 length = btrfs_dev_extent_length(eb, ptr);
9468 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9469 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9470 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9472 btrfs_init_path(&path);
9473 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9478 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9479 if (btrfs_chunk_length(l, chunk) != length)
9482 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9483 for (i = 0; i < num_stripes; i++) {
9484 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9485 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9487 if (devid == devext_key.objectid &&
9488 offset == devext_key.offset) {
9494 btrfs_release_path(&path);
9497 "device extent[%llu, %llu, %llu] did not find the related chunk",
9498 devext_key.objectid, devext_key.offset, length);
9499 return REFERENCER_MISSING;
9505 * Check if the used space is correct with the dev item
9507 static int check_dev_item(struct btrfs_fs_info *fs_info,
9508 struct extent_buffer *eb, int slot)
9510 struct btrfs_root *dev_root = fs_info->dev_root;
9511 struct btrfs_dev_item *dev_item;
9512 struct btrfs_path path;
9513 struct btrfs_key key;
9514 struct btrfs_dev_extent *ptr;
9520 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9521 dev_id = btrfs_device_id(eb, dev_item);
9522 used = btrfs_device_bytes_used(eb, dev_item);
9524 key.objectid = dev_id;
9525 key.type = BTRFS_DEV_EXTENT_KEY;
9528 btrfs_init_path(&path);
9529 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9531 btrfs_item_key_to_cpu(eb, &key, slot);
9532 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9533 key.objectid, key.type, key.offset);
9534 btrfs_release_path(&path);
9535 return REFERENCER_MISSING;
9538 /* Iterate dev_extents to calculate the used space of a device */
9540 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9542 if (key.objectid > dev_id)
9544 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9547 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9548 struct btrfs_dev_extent);
9549 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9551 ret = btrfs_next_item(dev_root, &path);
9555 btrfs_release_path(&path);
9557 if (used != total) {
9558 btrfs_item_key_to_cpu(eb, &key, slot);
9560 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9561 total, used, BTRFS_ROOT_TREE_OBJECTID,
9562 BTRFS_DEV_EXTENT_KEY, dev_id);
9563 return ACCOUNTING_MISMATCH;
9569 * Check a block group item with its referener (chunk) and its used space
9570 * with extent/metadata item
9572 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9573 struct extent_buffer *eb, int slot)
9575 struct btrfs_root *extent_root = fs_info->extent_root;
9576 struct btrfs_root *chunk_root = fs_info->chunk_root;
9577 struct btrfs_block_group_item *bi;
9578 struct btrfs_block_group_item bg_item;
9579 struct btrfs_path path;
9580 struct btrfs_key bg_key;
9581 struct btrfs_key chunk_key;
9582 struct btrfs_key extent_key;
9583 struct btrfs_chunk *chunk;
9584 struct extent_buffer *leaf;
9585 struct btrfs_extent_item *ei;
9586 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9594 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9595 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9596 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9597 used = btrfs_block_group_used(&bg_item);
9598 bg_flags = btrfs_block_group_flags(&bg_item);
9600 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9601 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9602 chunk_key.offset = bg_key.objectid;
9604 btrfs_init_path(&path);
9605 /* Search for the referencer chunk */
9606 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9609 "block group[%llu %llu] did not find the related chunk item",
9610 bg_key.objectid, bg_key.offset);
9611 err |= REFERENCER_MISSING;
9613 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9614 struct btrfs_chunk);
9615 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9618 "block group[%llu %llu] related chunk item length does not match",
9619 bg_key.objectid, bg_key.offset);
9620 err |= REFERENCER_MISMATCH;
9623 btrfs_release_path(&path);
9625 /* Search from the block group bytenr */
9626 extent_key.objectid = bg_key.objectid;
9627 extent_key.type = 0;
9628 extent_key.offset = 0;
9630 btrfs_init_path(&path);
9631 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9635 /* Iterate extent tree to account used space */
9637 leaf = path.nodes[0];
9638 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9639 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9642 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9643 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9645 if (extent_key.objectid < bg_key.objectid)
9648 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9651 total += extent_key.offset;
9653 ei = btrfs_item_ptr(leaf, path.slots[0],
9654 struct btrfs_extent_item);
9655 flags = btrfs_extent_flags(leaf, ei);
9656 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9657 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9659 "bad extent[%llu, %llu) type mismatch with chunk",
9660 extent_key.objectid,
9661 extent_key.objectid + extent_key.offset);
9662 err |= CHUNK_TYPE_MISMATCH;
9664 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9665 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9666 BTRFS_BLOCK_GROUP_METADATA))) {
9668 "bad extent[%llu, %llu) type mismatch with chunk",
9669 extent_key.objectid,
9670 extent_key.objectid + nodesize);
9671 err |= CHUNK_TYPE_MISMATCH;
9675 ret = btrfs_next_item(extent_root, &path);
9681 btrfs_release_path(&path);
9683 if (total != used) {
9685 "block group[%llu %llu] used %llu but extent items used %llu",
9686 bg_key.objectid, bg_key.offset, used, total);
9687 err |= ACCOUNTING_MISMATCH;
9693 * Check a chunk item.
9694 * Including checking all referred dev_extents and block group
9696 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9697 struct extent_buffer *eb, int slot)
9699 struct btrfs_root *extent_root = fs_info->extent_root;
9700 struct btrfs_root *dev_root = fs_info->dev_root;
9701 struct btrfs_path path;
9702 struct btrfs_key chunk_key;
9703 struct btrfs_key bg_key;
9704 struct btrfs_key devext_key;
9705 struct btrfs_chunk *chunk;
9706 struct extent_buffer *leaf;
9707 struct btrfs_block_group_item *bi;
9708 struct btrfs_block_group_item bg_item;
9709 struct btrfs_dev_extent *ptr;
9710 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9722 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9723 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9724 length = btrfs_chunk_length(eb, chunk);
9725 chunk_end = chunk_key.offset + length;
9726 if (!IS_ALIGNED(length, sectorsize)) {
9727 error("chunk[%llu %llu) not aligned to %u",
9728 chunk_key.offset, chunk_end, sectorsize);
9729 err |= BYTES_UNALIGNED;
9733 type = btrfs_chunk_type(eb, chunk);
9734 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9735 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9736 error("chunk[%llu %llu) has no chunk type",
9737 chunk_key.offset, chunk_end);
9738 err |= UNKNOWN_TYPE;
9740 if (profile && (profile & (profile - 1))) {
9741 error("chunk[%llu %llu) multiple profiles detected: %llx",
9742 chunk_key.offset, chunk_end, profile);
9743 err |= UNKNOWN_TYPE;
9746 bg_key.objectid = chunk_key.offset;
9747 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9748 bg_key.offset = length;
9750 btrfs_init_path(&path);
9751 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9754 "chunk[%llu %llu) did not find the related block group item",
9755 chunk_key.offset, chunk_end);
9756 err |= REFERENCER_MISSING;
9758 leaf = path.nodes[0];
9759 bi = btrfs_item_ptr(leaf, path.slots[0],
9760 struct btrfs_block_group_item);
9761 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9763 if (btrfs_block_group_flags(&bg_item) != type) {
9765 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9766 chunk_key.offset, chunk_end, type,
9767 btrfs_block_group_flags(&bg_item));
9768 err |= REFERENCER_MISSING;
9772 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9773 for (i = 0; i < num_stripes; i++) {
9774 btrfs_release_path(&path);
9775 btrfs_init_path(&path);
9776 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9777 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9778 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9780 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9785 leaf = path.nodes[0];
9786 ptr = btrfs_item_ptr(leaf, path.slots[0],
9787 struct btrfs_dev_extent);
9788 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9789 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9790 if (objectid != chunk_key.objectid ||
9791 offset != chunk_key.offset ||
9792 btrfs_dev_extent_length(leaf, ptr) != length)
9796 err |= BACKREF_MISSING;
9798 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9799 chunk_key.objectid, chunk_end, i);
9802 btrfs_release_path(&path);
9808 * Main entry function to check known items and update related accounting info
9810 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9812 struct btrfs_fs_info *fs_info = root->fs_info;
9813 struct btrfs_key key;
9816 struct btrfs_extent_data_ref *dref;
9821 btrfs_item_key_to_cpu(eb, &key, slot);
9822 type = btrfs_key_type(&key);
9825 case BTRFS_EXTENT_DATA_KEY:
9826 ret = check_extent_data_item(root, eb, slot);
9829 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9830 ret = check_block_group_item(fs_info, eb, slot);
9833 case BTRFS_DEV_ITEM_KEY:
9834 ret = check_dev_item(fs_info, eb, slot);
9837 case BTRFS_CHUNK_ITEM_KEY:
9838 ret = check_chunk_item(fs_info, eb, slot);
9841 case BTRFS_DEV_EXTENT_KEY:
9842 ret = check_dev_extent_item(fs_info, eb, slot);
9845 case BTRFS_EXTENT_ITEM_KEY:
9846 case BTRFS_METADATA_ITEM_KEY:
9847 ret = check_extent_item(fs_info, eb, slot);
9850 case BTRFS_EXTENT_CSUM_KEY:
9851 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9853 case BTRFS_TREE_BLOCK_REF_KEY:
9854 ret = check_tree_block_backref(fs_info, key.offset,
9858 case BTRFS_EXTENT_DATA_REF_KEY:
9859 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9860 ret = check_extent_data_backref(fs_info,
9861 btrfs_extent_data_ref_root(eb, dref),
9862 btrfs_extent_data_ref_objectid(eb, dref),
9863 btrfs_extent_data_ref_offset(eb, dref),
9865 btrfs_extent_data_ref_count(eb, dref));
9868 case BTRFS_SHARED_BLOCK_REF_KEY:
9869 ret = check_shared_block_backref(fs_info, key.offset,
9873 case BTRFS_SHARED_DATA_REF_KEY:
9874 ret = check_shared_data_backref(fs_info, key.offset,
9882 if (++slot < btrfs_header_nritems(eb))
9889 * Helper function for later fs/subvol tree check. To determine if a tree
9890 * block should be checked.
9891 * This function will ensure only the direct referencer with lowest rootid to
9892 * check a fs/subvolume tree block.
9894 * Backref check at extent tree would detect errors like missing subvolume
9895 * tree, so we can do aggressive check to reduce duplicated checks.
9897 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9899 struct btrfs_root *extent_root = root->fs_info->extent_root;
9900 struct btrfs_key key;
9901 struct btrfs_path path;
9902 struct extent_buffer *leaf;
9904 struct btrfs_extent_item *ei;
9910 struct btrfs_extent_inline_ref *iref;
9913 btrfs_init_path(&path);
9914 key.objectid = btrfs_header_bytenr(eb);
9915 key.type = BTRFS_METADATA_ITEM_KEY;
9916 key.offset = (u64)-1;
9919 * Any failure in backref resolving means we can't determine
9920 * whom the tree block belongs to.
9921 * So in that case, we need to check that tree block
9923 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9927 ret = btrfs_previous_extent_item(extent_root, &path,
9928 btrfs_header_bytenr(eb));
9932 leaf = path.nodes[0];
9933 slot = path.slots[0];
9934 btrfs_item_key_to_cpu(leaf, &key, slot);
9935 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9937 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9938 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9940 struct btrfs_tree_block_info *info;
9942 info = (struct btrfs_tree_block_info *)(ei + 1);
9943 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9946 item_size = btrfs_item_size_nr(leaf, slot);
9947 ptr = (unsigned long)iref;
9948 end = (unsigned long)ei + item_size;
9950 iref = (struct btrfs_extent_inline_ref *)ptr;
9951 type = btrfs_extent_inline_ref_type(leaf, iref);
9952 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9955 * We only check the tree block if current root is
9956 * the lowest referencer of it.
9958 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9959 offset < root->objectid) {
9960 btrfs_release_path(&path);
9964 ptr += btrfs_extent_inline_ref_size(type);
9967 * Normally we should also check keyed tree block ref, but that may be
9968 * very time consuming. Inlined ref should already make us skip a lot
9969 * of refs now. So skip search keyed tree block ref.
9973 btrfs_release_path(&path);
9978 * Traversal function for tree block. We will do:
9979 * 1) Skip shared fs/subvolume tree blocks
9980 * 2) Update related bytes accounting
9981 * 3) Pre-order traversal
9983 static int traverse_tree_block(struct btrfs_root *root,
9984 struct extent_buffer *node)
9986 struct extent_buffer *eb;
9994 * Skip shared fs/subvolume tree block, in that case they will
9995 * be checked by referencer with lowest rootid
9997 if (is_fstree(root->objectid) && !should_check(root, node))
10000 /* Update bytes accounting */
10001 total_btree_bytes += node->len;
10002 if (fs_root_objectid(btrfs_header_owner(node)))
10003 total_fs_tree_bytes += node->len;
10004 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10005 total_extent_tree_bytes += node->len;
10006 if (!found_old_backref &&
10007 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10008 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10009 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10010 found_old_backref = 1;
10012 /* pre-order tranversal, check itself first */
10013 level = btrfs_header_level(node);
10014 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10015 btrfs_header_level(node),
10016 btrfs_header_owner(node));
10020 "check %s failed root %llu bytenr %llu level %d, force continue check",
10021 level ? "node":"leaf", root->objectid,
10022 btrfs_header_bytenr(node), btrfs_header_level(node));
10025 btree_space_waste += btrfs_leaf_free_space(root, node);
10026 ret = check_leaf_items(root, node);
10031 nr = btrfs_header_nritems(node);
10032 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10033 sizeof(struct btrfs_key_ptr);
10035 /* Then check all its children */
10036 for (i = 0; i < nr; i++) {
10037 u64 blocknr = btrfs_node_blockptr(node, i);
10040 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10041 * to call the function itself.
10043 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10044 if (extent_buffer_uptodate(eb)) {
10045 ret = traverse_tree_block(root, eb);
10048 free_extent_buffer(eb);
10055 * Low memory usage version check_chunks_and_extents.
10057 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10059 struct btrfs_path path;
10060 struct btrfs_key key;
10061 struct btrfs_root *root1;
10062 struct btrfs_root *cur_root;
10066 root1 = root->fs_info->chunk_root;
10067 ret = traverse_tree_block(root1, root1->node);
10070 root1 = root->fs_info->tree_root;
10071 ret = traverse_tree_block(root1, root1->node);
10074 btrfs_init_path(&path);
10075 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10077 key.type = BTRFS_ROOT_ITEM_KEY;
10079 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10081 error("cannot find extent treet in tree_root");
10086 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10087 if (key.type != BTRFS_ROOT_ITEM_KEY)
10089 key.offset = (u64)-1;
10091 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10092 if (IS_ERR(cur_root) || !cur_root) {
10093 error("failed to read tree: %lld", key.objectid);
10097 ret = traverse_tree_block(cur_root, cur_root->node);
10101 ret = btrfs_next_item(root1, &path);
10107 btrfs_release_path(&path);
10111 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10112 struct btrfs_root *root, int overwrite)
10114 struct extent_buffer *c;
10115 struct extent_buffer *old = root->node;
10118 struct btrfs_disk_key disk_key = {0,0,0};
10124 extent_buffer_get(c);
10127 c = btrfs_alloc_free_block(trans, root,
10129 root->root_key.objectid,
10130 &disk_key, level, 0, 0);
10133 extent_buffer_get(c);
10137 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10138 btrfs_set_header_level(c, level);
10139 btrfs_set_header_bytenr(c, c->start);
10140 btrfs_set_header_generation(c, trans->transid);
10141 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10142 btrfs_set_header_owner(c, root->root_key.objectid);
10144 write_extent_buffer(c, root->fs_info->fsid,
10145 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10147 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10148 btrfs_header_chunk_tree_uuid(c),
10151 btrfs_mark_buffer_dirty(c);
10153 * this case can happen in the following case:
10155 * 1.overwrite previous root.
10157 * 2.reinit reloc data root, this is because we skip pin
10158 * down reloc data tree before which means we can allocate
10159 * same block bytenr here.
10161 if (old->start == c->start) {
10162 btrfs_set_root_generation(&root->root_item,
10164 root->root_item.level = btrfs_header_level(root->node);
10165 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10166 &root->root_key, &root->root_item);
10168 free_extent_buffer(c);
10172 free_extent_buffer(old);
10174 add_root_to_dirty_list(root);
10178 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10179 struct extent_buffer *eb, int tree_root)
10181 struct extent_buffer *tmp;
10182 struct btrfs_root_item *ri;
10183 struct btrfs_key key;
10186 int level = btrfs_header_level(eb);
10192 * If we have pinned this block before, don't pin it again.
10193 * This can not only avoid forever loop with broken filesystem
10194 * but also give us some speedups.
10196 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10197 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10200 btrfs_pin_extent(fs_info, eb->start, eb->len);
10202 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10203 nritems = btrfs_header_nritems(eb);
10204 for (i = 0; i < nritems; i++) {
10206 btrfs_item_key_to_cpu(eb, &key, i);
10207 if (key.type != BTRFS_ROOT_ITEM_KEY)
10209 /* Skip the extent root and reloc roots */
10210 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10211 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10212 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10214 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10215 bytenr = btrfs_disk_root_bytenr(eb, ri);
10218 * If at any point we start needing the real root we
10219 * will have to build a stump root for the root we are
10220 * in, but for now this doesn't actually use the root so
10221 * just pass in extent_root.
10223 tmp = read_tree_block(fs_info->extent_root, bytenr,
10225 if (!extent_buffer_uptodate(tmp)) {
10226 fprintf(stderr, "Error reading root block\n");
10229 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10230 free_extent_buffer(tmp);
10234 bytenr = btrfs_node_blockptr(eb, i);
10236 /* If we aren't the tree root don't read the block */
10237 if (level == 1 && !tree_root) {
10238 btrfs_pin_extent(fs_info, bytenr, nodesize);
10242 tmp = read_tree_block(fs_info->extent_root, bytenr,
10244 if (!extent_buffer_uptodate(tmp)) {
10245 fprintf(stderr, "Error reading tree block\n");
10248 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10249 free_extent_buffer(tmp);
10258 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10262 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10266 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10269 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10271 struct btrfs_block_group_cache *cache;
10272 struct btrfs_path *path;
10273 struct extent_buffer *leaf;
10274 struct btrfs_chunk *chunk;
10275 struct btrfs_key key;
10279 path = btrfs_alloc_path();
10284 key.type = BTRFS_CHUNK_ITEM_KEY;
10287 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10289 btrfs_free_path(path);
10294 * We do this in case the block groups were screwed up and had alloc
10295 * bits that aren't actually set on the chunks. This happens with
10296 * restored images every time and could happen in real life I guess.
10298 fs_info->avail_data_alloc_bits = 0;
10299 fs_info->avail_metadata_alloc_bits = 0;
10300 fs_info->avail_system_alloc_bits = 0;
10302 /* First we need to create the in-memory block groups */
10304 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10305 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10307 btrfs_free_path(path);
10315 leaf = path->nodes[0];
10316 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10317 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10322 chunk = btrfs_item_ptr(leaf, path->slots[0],
10323 struct btrfs_chunk);
10324 btrfs_add_block_group(fs_info, 0,
10325 btrfs_chunk_type(leaf, chunk),
10326 key.objectid, key.offset,
10327 btrfs_chunk_length(leaf, chunk));
10328 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10329 key.offset + btrfs_chunk_length(leaf, chunk),
10335 cache = btrfs_lookup_first_block_group(fs_info, start);
10339 start = cache->key.objectid + cache->key.offset;
10342 btrfs_free_path(path);
10346 static int reset_balance(struct btrfs_trans_handle *trans,
10347 struct btrfs_fs_info *fs_info)
10349 struct btrfs_root *root = fs_info->tree_root;
10350 struct btrfs_path *path;
10351 struct extent_buffer *leaf;
10352 struct btrfs_key key;
10353 int del_slot, del_nr = 0;
10357 path = btrfs_alloc_path();
10361 key.objectid = BTRFS_BALANCE_OBJECTID;
10362 key.type = BTRFS_BALANCE_ITEM_KEY;
10365 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10370 goto reinit_data_reloc;
10375 ret = btrfs_del_item(trans, root, path);
10378 btrfs_release_path(path);
10380 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10381 key.type = BTRFS_ROOT_ITEM_KEY;
10384 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10388 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10393 ret = btrfs_del_items(trans, root, path,
10400 btrfs_release_path(path);
10403 ret = btrfs_search_slot(trans, root, &key, path,
10410 leaf = path->nodes[0];
10411 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10412 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10414 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10419 del_slot = path->slots[0];
10428 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10432 btrfs_release_path(path);
10435 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10436 key.type = BTRFS_ROOT_ITEM_KEY;
10437 key.offset = (u64)-1;
10438 root = btrfs_read_fs_root(fs_info, &key);
10439 if (IS_ERR(root)) {
10440 fprintf(stderr, "Error reading data reloc tree\n");
10441 ret = PTR_ERR(root);
10444 record_root_in_trans(trans, root);
10445 ret = btrfs_fsck_reinit_root(trans, root, 0);
10448 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10450 btrfs_free_path(path);
10454 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10455 struct btrfs_fs_info *fs_info)
10461 * The only reason we don't do this is because right now we're just
10462 * walking the trees we find and pinning down their bytes, we don't look
10463 * at any of the leaves. In order to do mixed groups we'd have to check
10464 * the leaves of any fs roots and pin down the bytes for any file
10465 * extents we find. Not hard but why do it if we don't have to?
10467 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10468 fprintf(stderr, "We don't support re-initing the extent tree "
10469 "for mixed block groups yet, please notify a btrfs "
10470 "developer you want to do this so they can add this "
10471 "functionality.\n");
10476 * first we need to walk all of the trees except the extent tree and pin
10477 * down the bytes that are in use so we don't overwrite any existing
10480 ret = pin_metadata_blocks(fs_info);
10482 fprintf(stderr, "error pinning down used bytes\n");
10487 * Need to drop all the block groups since we're going to recreate all
10490 btrfs_free_block_groups(fs_info);
10491 ret = reset_block_groups(fs_info);
10493 fprintf(stderr, "error resetting the block groups\n");
10497 /* Ok we can allocate now, reinit the extent root */
10498 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10500 fprintf(stderr, "extent root initialization failed\n");
10502 * When the transaction code is updated we should end the
10503 * transaction, but for now progs only knows about commit so
10504 * just return an error.
10510 * Now we have all the in-memory block groups setup so we can make
10511 * allocations properly, and the metadata we care about is safe since we
10512 * pinned all of it above.
10515 struct btrfs_block_group_cache *cache;
10517 cache = btrfs_lookup_first_block_group(fs_info, start);
10520 start = cache->key.objectid + cache->key.offset;
10521 ret = btrfs_insert_item(trans, fs_info->extent_root,
10522 &cache->key, &cache->item,
10523 sizeof(cache->item));
10525 fprintf(stderr, "Error adding block group\n");
10528 btrfs_extent_post_op(trans, fs_info->extent_root);
10531 ret = reset_balance(trans, fs_info);
10533 fprintf(stderr, "error resetting the pending balance\n");
10538 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10540 struct btrfs_path *path;
10541 struct btrfs_trans_handle *trans;
10542 struct btrfs_key key;
10545 printf("Recowing metadata block %llu\n", eb->start);
10546 key.objectid = btrfs_header_owner(eb);
10547 key.type = BTRFS_ROOT_ITEM_KEY;
10548 key.offset = (u64)-1;
10550 root = btrfs_read_fs_root(root->fs_info, &key);
10551 if (IS_ERR(root)) {
10552 fprintf(stderr, "Couldn't find owner root %llu\n",
10554 return PTR_ERR(root);
10557 path = btrfs_alloc_path();
10561 trans = btrfs_start_transaction(root, 1);
10562 if (IS_ERR(trans)) {
10563 btrfs_free_path(path);
10564 return PTR_ERR(trans);
10567 path->lowest_level = btrfs_header_level(eb);
10568 if (path->lowest_level)
10569 btrfs_node_key_to_cpu(eb, &key, 0);
10571 btrfs_item_key_to_cpu(eb, &key, 0);
10573 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10574 btrfs_commit_transaction(trans, root);
10575 btrfs_free_path(path);
10579 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10581 struct btrfs_path *path;
10582 struct btrfs_trans_handle *trans;
10583 struct btrfs_key key;
10586 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10587 bad->key.type, bad->key.offset);
10588 key.objectid = bad->root_id;
10589 key.type = BTRFS_ROOT_ITEM_KEY;
10590 key.offset = (u64)-1;
10592 root = btrfs_read_fs_root(root->fs_info, &key);
10593 if (IS_ERR(root)) {
10594 fprintf(stderr, "Couldn't find owner root %llu\n",
10596 return PTR_ERR(root);
10599 path = btrfs_alloc_path();
10603 trans = btrfs_start_transaction(root, 1);
10604 if (IS_ERR(trans)) {
10605 btrfs_free_path(path);
10606 return PTR_ERR(trans);
10609 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10615 ret = btrfs_del_item(trans, root, path);
10617 btrfs_commit_transaction(trans, root);
10618 btrfs_free_path(path);
10622 static int zero_log_tree(struct btrfs_root *root)
10624 struct btrfs_trans_handle *trans;
10627 trans = btrfs_start_transaction(root, 1);
10628 if (IS_ERR(trans)) {
10629 ret = PTR_ERR(trans);
10632 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10633 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10634 ret = btrfs_commit_transaction(trans, root);
10638 static int populate_csum(struct btrfs_trans_handle *trans,
10639 struct btrfs_root *csum_root, char *buf, u64 start,
10646 while (offset < len) {
10647 sectorsize = csum_root->sectorsize;
10648 ret = read_extent_data(csum_root, buf, start + offset,
10652 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10653 start + offset, buf, sectorsize);
10656 offset += sectorsize;
10661 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10662 struct btrfs_root *csum_root,
10663 struct btrfs_root *cur_root)
10665 struct btrfs_path *path;
10666 struct btrfs_key key;
10667 struct extent_buffer *node;
10668 struct btrfs_file_extent_item *fi;
10675 path = btrfs_alloc_path();
10678 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10688 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10691 /* Iterate all regular file extents and fill its csum */
10693 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10695 if (key.type != BTRFS_EXTENT_DATA_KEY)
10697 node = path->nodes[0];
10698 slot = path->slots[0];
10699 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10700 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10702 start = btrfs_file_extent_disk_bytenr(node, fi);
10703 len = btrfs_file_extent_disk_num_bytes(node, fi);
10705 ret = populate_csum(trans, csum_root, buf, start, len);
10706 if (ret == -EEXIST)
10712 * TODO: if next leaf is corrupted, jump to nearest next valid
10715 ret = btrfs_next_item(cur_root, path);
10725 btrfs_free_path(path);
10730 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10731 struct btrfs_root *csum_root)
10733 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10734 struct btrfs_path *path;
10735 struct btrfs_root *tree_root = fs_info->tree_root;
10736 struct btrfs_root *cur_root;
10737 struct extent_buffer *node;
10738 struct btrfs_key key;
10742 path = btrfs_alloc_path();
10746 key.objectid = BTRFS_FS_TREE_OBJECTID;
10748 key.type = BTRFS_ROOT_ITEM_KEY;
10750 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10759 node = path->nodes[0];
10760 slot = path->slots[0];
10761 btrfs_item_key_to_cpu(node, &key, slot);
10762 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10764 if (key.type != BTRFS_ROOT_ITEM_KEY)
10766 if (!is_fstree(key.objectid))
10768 key.offset = (u64)-1;
10770 cur_root = btrfs_read_fs_root(fs_info, &key);
10771 if (IS_ERR(cur_root) || !cur_root) {
10772 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10776 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10781 ret = btrfs_next_item(tree_root, path);
10791 btrfs_free_path(path);
10795 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10796 struct btrfs_root *csum_root)
10798 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10799 struct btrfs_path *path;
10800 struct btrfs_extent_item *ei;
10801 struct extent_buffer *leaf;
10803 struct btrfs_key key;
10806 path = btrfs_alloc_path();
10811 key.type = BTRFS_EXTENT_ITEM_KEY;
10814 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10816 btrfs_free_path(path);
10820 buf = malloc(csum_root->sectorsize);
10822 btrfs_free_path(path);
10827 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10828 ret = btrfs_next_leaf(extent_root, path);
10836 leaf = path->nodes[0];
10838 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10839 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10844 ei = btrfs_item_ptr(leaf, path->slots[0],
10845 struct btrfs_extent_item);
10846 if (!(btrfs_extent_flags(leaf, ei) &
10847 BTRFS_EXTENT_FLAG_DATA)) {
10852 ret = populate_csum(trans, csum_root, buf, key.objectid,
10859 btrfs_free_path(path);
10865 * Recalculate the csum and put it into the csum tree.
10867 * Extent tree init will wipe out all the extent info, so in that case, we
10868 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10869 * will use fs/subvol trees to init the csum tree.
10871 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10872 struct btrfs_root *csum_root,
10873 int search_fs_tree)
10875 if (search_fs_tree)
10876 return fill_csum_tree_from_fs(trans, csum_root);
10878 return fill_csum_tree_from_extent(trans, csum_root);
10881 static void free_roots_info_cache(void)
10883 if (!roots_info_cache)
10886 while (!cache_tree_empty(roots_info_cache)) {
10887 struct cache_extent *entry;
10888 struct root_item_info *rii;
10890 entry = first_cache_extent(roots_info_cache);
10893 remove_cache_extent(roots_info_cache, entry);
10894 rii = container_of(entry, struct root_item_info, cache_extent);
10898 free(roots_info_cache);
10899 roots_info_cache = NULL;
10902 static int build_roots_info_cache(struct btrfs_fs_info *info)
10905 struct btrfs_key key;
10906 struct extent_buffer *leaf;
10907 struct btrfs_path *path;
10909 if (!roots_info_cache) {
10910 roots_info_cache = malloc(sizeof(*roots_info_cache));
10911 if (!roots_info_cache)
10913 cache_tree_init(roots_info_cache);
10916 path = btrfs_alloc_path();
10921 key.type = BTRFS_EXTENT_ITEM_KEY;
10924 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10927 leaf = path->nodes[0];
10930 struct btrfs_key found_key;
10931 struct btrfs_extent_item *ei;
10932 struct btrfs_extent_inline_ref *iref;
10933 int slot = path->slots[0];
10938 struct cache_extent *entry;
10939 struct root_item_info *rii;
10941 if (slot >= btrfs_header_nritems(leaf)) {
10942 ret = btrfs_next_leaf(info->extent_root, path);
10949 leaf = path->nodes[0];
10950 slot = path->slots[0];
10953 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10955 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10956 found_key.type != BTRFS_METADATA_ITEM_KEY)
10959 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10960 flags = btrfs_extent_flags(leaf, ei);
10962 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10963 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10966 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10967 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10968 level = found_key.offset;
10970 struct btrfs_tree_block_info *binfo;
10972 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10973 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10974 level = btrfs_tree_block_level(leaf, binfo);
10978 * For a root extent, it must be of the following type and the
10979 * first (and only one) iref in the item.
10981 type = btrfs_extent_inline_ref_type(leaf, iref);
10982 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10985 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10986 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10988 rii = malloc(sizeof(struct root_item_info));
10993 rii->cache_extent.start = root_id;
10994 rii->cache_extent.size = 1;
10995 rii->level = (u8)-1;
10996 entry = &rii->cache_extent;
10997 ret = insert_cache_extent(roots_info_cache, entry);
11000 rii = container_of(entry, struct root_item_info,
11004 ASSERT(rii->cache_extent.start == root_id);
11005 ASSERT(rii->cache_extent.size == 1);
11007 if (level > rii->level || rii->level == (u8)-1) {
11008 rii->level = level;
11009 rii->bytenr = found_key.objectid;
11010 rii->gen = btrfs_extent_generation(leaf, ei);
11011 rii->node_count = 1;
11012 } else if (level == rii->level) {
11020 btrfs_free_path(path);
11025 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11026 struct btrfs_path *path,
11027 const struct btrfs_key *root_key,
11028 const int read_only_mode)
11030 const u64 root_id = root_key->objectid;
11031 struct cache_extent *entry;
11032 struct root_item_info *rii;
11033 struct btrfs_root_item ri;
11034 unsigned long offset;
11036 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11039 "Error: could not find extent items for root %llu\n",
11040 root_key->objectid);
11044 rii = container_of(entry, struct root_item_info, cache_extent);
11045 ASSERT(rii->cache_extent.start == root_id);
11046 ASSERT(rii->cache_extent.size == 1);
11048 if (rii->node_count != 1) {
11050 "Error: could not find btree root extent for root %llu\n",
11055 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11056 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11058 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11059 btrfs_root_level(&ri) != rii->level ||
11060 btrfs_root_generation(&ri) != rii->gen) {
11063 * If we're in repair mode but our caller told us to not update
11064 * the root item, i.e. just check if it needs to be updated, don't
11065 * print this message, since the caller will call us again shortly
11066 * for the same root item without read only mode (the caller will
11067 * open a transaction first).
11069 if (!(read_only_mode && repair))
11071 "%sroot item for root %llu,"
11072 " current bytenr %llu, current gen %llu, current level %u,"
11073 " new bytenr %llu, new gen %llu, new level %u\n",
11074 (read_only_mode ? "" : "fixing "),
11076 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11077 btrfs_root_level(&ri),
11078 rii->bytenr, rii->gen, rii->level);
11080 if (btrfs_root_generation(&ri) > rii->gen) {
11082 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11083 root_id, btrfs_root_generation(&ri), rii->gen);
11087 if (!read_only_mode) {
11088 btrfs_set_root_bytenr(&ri, rii->bytenr);
11089 btrfs_set_root_level(&ri, rii->level);
11090 btrfs_set_root_generation(&ri, rii->gen);
11091 write_extent_buffer(path->nodes[0], &ri,
11092 offset, sizeof(ri));
11102 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11103 * caused read-only snapshots to be corrupted if they were created at a moment
11104 * when the source subvolume/snapshot had orphan items. The issue was that the
11105 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11106 * node instead of the post orphan cleanup root node.
11107 * So this function, and its callees, just detects and fixes those cases. Even
11108 * though the regression was for read-only snapshots, this function applies to
11109 * any snapshot/subvolume root.
11110 * This must be run before any other repair code - not doing it so, makes other
11111 * repair code delete or modify backrefs in the extent tree for example, which
11112 * will result in an inconsistent fs after repairing the root items.
11114 static int repair_root_items(struct btrfs_fs_info *info)
11116 struct btrfs_path *path = NULL;
11117 struct btrfs_key key;
11118 struct extent_buffer *leaf;
11119 struct btrfs_trans_handle *trans = NULL;
11122 int need_trans = 0;
11124 ret = build_roots_info_cache(info);
11128 path = btrfs_alloc_path();
11134 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11135 key.type = BTRFS_ROOT_ITEM_KEY;
11140 * Avoid opening and committing transactions if a leaf doesn't have
11141 * any root items that need to be fixed, so that we avoid rotating
11142 * backup roots unnecessarily.
11145 trans = btrfs_start_transaction(info->tree_root, 1);
11146 if (IS_ERR(trans)) {
11147 ret = PTR_ERR(trans);
11152 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11156 leaf = path->nodes[0];
11159 struct btrfs_key found_key;
11161 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11162 int no_more_keys = find_next_key(path, &key);
11164 btrfs_release_path(path);
11166 ret = btrfs_commit_transaction(trans,
11178 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11180 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11182 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11185 ret = maybe_repair_root_item(info, path, &found_key,
11190 if (!trans && repair) {
11193 btrfs_release_path(path);
11203 free_roots_info_cache();
11204 btrfs_free_path(path);
11206 btrfs_commit_transaction(trans, info->tree_root);
11213 const char * const cmd_check_usage[] = {
11214 "btrfs check [options] <device>",
11215 "Check structural integrity of a filesystem (unmounted).",
11216 "Check structural integrity of an unmounted filesystem. Verify internal",
11217 "trees' consistency and item connectivity. In the repair mode try to",
11218 "fix the problems found. ",
11219 "WARNING: the repair mode is considered dangerous",
11221 "-s|--super <superblock> use this superblock copy",
11222 "-b|--backup use the first valid backup root copy",
11223 "--repair try to repair the filesystem",
11224 "--readonly run in read-only mode (default)",
11225 "--init-csum-tree create a new CRC tree",
11226 "--init-extent-tree create a new extent tree",
11227 "--mode <MODE> select mode, allows to make some memory/IO",
11228 " trade-offs, where MODE is one of:",
11229 " original - read inodes and extents to memory (requires",
11230 " more memory, does less IO)",
11231 " lowmem - try to use less memory but read blocks again",
11233 "--check-data-csum verify checksums of data blocks",
11234 "-Q|--qgroup-report print a report on qgroup consistency",
11235 "-E|--subvol-extents <subvolid>",
11236 " print subvolume extents and sharing state",
11237 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11238 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11239 "-p|--progress indicate progress",
11243 int cmd_check(int argc, char **argv)
11245 struct cache_tree root_cache;
11246 struct btrfs_root *root;
11247 struct btrfs_fs_info *info;
11250 u64 tree_root_bytenr = 0;
11251 u64 chunk_root_bytenr = 0;
11252 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11255 int init_csum_tree = 0;
11257 int qgroup_report = 0;
11258 int qgroups_repaired = 0;
11259 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11263 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11264 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11265 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11267 static const struct option long_options[] = {
11268 { "super", required_argument, NULL, 's' },
11269 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11270 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11271 { "init-csum-tree", no_argument, NULL,
11272 GETOPT_VAL_INIT_CSUM },
11273 { "init-extent-tree", no_argument, NULL,
11274 GETOPT_VAL_INIT_EXTENT },
11275 { "check-data-csum", no_argument, NULL,
11276 GETOPT_VAL_CHECK_CSUM },
11277 { "backup", no_argument, NULL, 'b' },
11278 { "subvol-extents", required_argument, NULL, 'E' },
11279 { "qgroup-report", no_argument, NULL, 'Q' },
11280 { "tree-root", required_argument, NULL, 'r' },
11281 { "chunk-root", required_argument, NULL,
11282 GETOPT_VAL_CHUNK_TREE },
11283 { "progress", no_argument, NULL, 'p' },
11284 { "mode", required_argument, NULL,
11286 { NULL, 0, NULL, 0}
11289 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11293 case 'a': /* ignored */ break;
11295 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11298 num = arg_strtou64(optarg);
11299 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11301 "ERROR: super mirror should be less than: %d\n",
11302 BTRFS_SUPER_MIRROR_MAX);
11305 bytenr = btrfs_sb_offset(((int)num));
11306 printf("using SB copy %llu, bytenr %llu\n", num,
11307 (unsigned long long)bytenr);
11313 subvolid = arg_strtou64(optarg);
11316 tree_root_bytenr = arg_strtou64(optarg);
11318 case GETOPT_VAL_CHUNK_TREE:
11319 chunk_root_bytenr = arg_strtou64(optarg);
11322 ctx.progress_enabled = true;
11326 usage(cmd_check_usage);
11327 case GETOPT_VAL_REPAIR:
11328 printf("enabling repair mode\n");
11330 ctree_flags |= OPEN_CTREE_WRITES;
11332 case GETOPT_VAL_READONLY:
11335 case GETOPT_VAL_INIT_CSUM:
11336 printf("Creating a new CRC tree\n");
11337 init_csum_tree = 1;
11339 ctree_flags |= OPEN_CTREE_WRITES;
11341 case GETOPT_VAL_INIT_EXTENT:
11342 init_extent_tree = 1;
11343 ctree_flags |= (OPEN_CTREE_WRITES |
11344 OPEN_CTREE_NO_BLOCK_GROUPS);
11347 case GETOPT_VAL_CHECK_CSUM:
11348 check_data_csum = 1;
11350 case GETOPT_VAL_MODE:
11351 check_mode = parse_check_mode(optarg);
11352 if (check_mode == CHECK_MODE_UNKNOWN) {
11353 error("unknown mode: %s", optarg);
11360 if (check_argc_exact(argc - optind, 1))
11361 usage(cmd_check_usage);
11363 if (ctx.progress_enabled) {
11364 ctx.tp = TASK_NOTHING;
11365 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11368 /* This check is the only reason for --readonly to exist */
11369 if (readonly && repair) {
11370 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11375 * Not supported yet
11377 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11378 error("Low memory mode doesn't support repair yet");
11383 cache_tree_init(&root_cache);
11385 if((ret = check_mounted(argv[optind])) < 0) {
11386 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11389 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11394 /* only allow partial opening under repair mode */
11396 ctree_flags |= OPEN_CTREE_PARTIAL;
11398 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11399 chunk_root_bytenr, ctree_flags);
11401 fprintf(stderr, "Couldn't open file system\n");
11406 global_info = info;
11407 root = info->fs_root;
11410 * repair mode will force us to commit transaction which
11411 * will make us fail to load log tree when mounting.
11413 if (repair && btrfs_super_log_root(info->super_copy)) {
11414 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11419 ret = zero_log_tree(root);
11421 fprintf(stderr, "fail to zero log tree\n");
11426 uuid_unparse(info->super_copy->fsid, uuidbuf);
11427 if (qgroup_report) {
11428 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11430 ret = qgroup_verify_all(info);
11436 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11437 subvolid, argv[optind], uuidbuf);
11438 ret = print_extent_state(info, subvolid);
11441 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11443 if (!extent_buffer_uptodate(info->tree_root->node) ||
11444 !extent_buffer_uptodate(info->dev_root->node) ||
11445 !extent_buffer_uptodate(info->chunk_root->node)) {
11446 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11451 if (init_extent_tree || init_csum_tree) {
11452 struct btrfs_trans_handle *trans;
11454 trans = btrfs_start_transaction(info->extent_root, 0);
11455 if (IS_ERR(trans)) {
11456 fprintf(stderr, "Error starting transaction\n");
11457 ret = PTR_ERR(trans);
11461 if (init_extent_tree) {
11462 printf("Creating a new extent tree\n");
11463 ret = reinit_extent_tree(trans, info);
11468 if (init_csum_tree) {
11469 fprintf(stderr, "Reinit crc root\n");
11470 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11472 fprintf(stderr, "crc root initialization failed\n");
11477 ret = fill_csum_tree(trans, info->csum_root,
11480 fprintf(stderr, "crc refilling failed\n");
11485 * Ok now we commit and run the normal fsck, which will add
11486 * extent entries for all of the items it finds.
11488 ret = btrfs_commit_transaction(trans, info->extent_root);
11492 if (!extent_buffer_uptodate(info->extent_root->node)) {
11493 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11497 if (!extent_buffer_uptodate(info->csum_root->node)) {
11498 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11503 if (!ctx.progress_enabled)
11504 fprintf(stderr, "checking extents\n");
11505 if (check_mode == CHECK_MODE_LOWMEM)
11506 ret = check_chunks_and_extents_v2(root);
11508 ret = check_chunks_and_extents(root);
11510 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11512 ret = repair_root_items(info);
11516 fprintf(stderr, "Fixed %d roots.\n", ret);
11518 } else if (ret > 0) {
11520 "Found %d roots with an outdated root item.\n",
11523 "Please run a filesystem check with the option --repair to fix them.\n");
11528 if (!ctx.progress_enabled) {
11529 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11530 fprintf(stderr, "checking free space tree\n");
11532 fprintf(stderr, "checking free space cache\n");
11534 ret = check_space_cache(root);
11539 * We used to have to have these hole extents in between our real
11540 * extents so if we don't have this flag set we need to make sure there
11541 * are no gaps in the file extents for inodes, otherwise we can just
11542 * ignore it when this happens.
11544 no_holes = btrfs_fs_incompat(root->fs_info,
11545 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11546 if (!ctx.progress_enabled)
11547 fprintf(stderr, "checking fs roots\n");
11548 ret = check_fs_roots(root, &root_cache);
11552 fprintf(stderr, "checking csums\n");
11553 ret = check_csums(root);
11557 fprintf(stderr, "checking root refs\n");
11558 ret = check_root_refs(root, &root_cache);
11562 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11563 struct extent_buffer *eb;
11565 eb = list_first_entry(&root->fs_info->recow_ebs,
11566 struct extent_buffer, recow);
11567 list_del_init(&eb->recow);
11568 ret = recow_extent_buffer(root, eb);
11573 while (!list_empty(&delete_items)) {
11574 struct bad_item *bad;
11576 bad = list_first_entry(&delete_items, struct bad_item, list);
11577 list_del_init(&bad->list);
11579 ret = delete_bad_item(root, bad);
11583 if (info->quota_enabled) {
11585 fprintf(stderr, "checking quota groups\n");
11586 err = qgroup_verify_all(info);
11590 err = repair_qgroups(info, &qgroups_repaired);
11595 if (!list_empty(&root->fs_info->recow_ebs)) {
11596 fprintf(stderr, "Transid errors in file system\n");
11600 /* Don't override original ret */
11601 if (!ret && qgroups_repaired)
11602 ret = qgroups_repaired;
11604 if (found_old_backref) { /*
11605 * there was a disk format change when mixed
11606 * backref was in testing tree. The old format
11607 * existed about one week.
11609 printf("\n * Found old mixed backref format. "
11610 "The old format is not supported! *"
11611 "\n * Please mount the FS in readonly mode, "
11612 "backup data and re-format the FS. *\n\n");
11615 printf("found %llu bytes used err is %d\n",
11616 (unsigned long long)bytes_used, ret);
11617 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11618 printf("total tree bytes: %llu\n",
11619 (unsigned long long)total_btree_bytes);
11620 printf("total fs tree bytes: %llu\n",
11621 (unsigned long long)total_fs_tree_bytes);
11622 printf("total extent tree bytes: %llu\n",
11623 (unsigned long long)total_extent_tree_bytes);
11624 printf("btree space waste bytes: %llu\n",
11625 (unsigned long long)btree_space_waste);
11626 printf("file data blocks allocated: %llu\n referenced %llu\n",
11627 (unsigned long long)data_bytes_allocated,
11628 (unsigned long long)data_bytes_referenced);
11630 free_qgroup_counts();
11631 free_root_recs_tree(&root_cache);
11635 if (ctx.progress_enabled)
11636 task_deinit(ctx.info);