2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 return rb_entry(node, struct extent_backref, node);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
122 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124 struct data_backref *back1 = to_data_backref(ext1);
125 struct data_backref *back2 = to_data_backref(ext2);
127 WARN_ON(!ext1->is_data);
128 WARN_ON(!ext2->is_data);
130 /* parent and root are a union, so this covers both */
131 if (back1->parent > back2->parent)
133 if (back1->parent < back2->parent)
136 /* This is a full backref and the parents match. */
137 if (back1->node.full_backref)
140 if (back1->owner > back2->owner)
142 if (back1->owner < back2->owner)
145 if (back1->offset > back2->offset)
147 if (back1->offset < back2->offset)
150 if (back1->bytes > back2->bytes)
152 if (back1->bytes < back2->bytes)
155 if (back1->found_ref && back2->found_ref) {
156 if (back1->disk_bytenr > back2->disk_bytenr)
158 if (back1->disk_bytenr < back2->disk_bytenr)
161 if (back1->found_ref > back2->found_ref)
163 if (back1->found_ref < back2->found_ref)
171 * Much like data_backref, just removed the undetermined members
172 * and change it to use list_head.
173 * During extent scan, it is stored in root->orphan_data_extent.
174 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
176 struct orphan_data_extent {
177 struct list_head list;
185 struct tree_backref {
186 struct extent_backref node;
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
195 return container_of(back, struct tree_backref, node);
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
200 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202 struct tree_backref *back1 = to_tree_backref(ext1);
203 struct tree_backref *back2 = to_tree_backref(ext2);
205 WARN_ON(ext1->is_data);
206 WARN_ON(ext2->is_data);
208 /* parent and root are a union, so this covers both */
209 if (back1->parent > back2->parent)
211 if (back1->parent < back2->parent)
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
219 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 if (ext1->is_data > ext2->is_data)
225 if (ext1->is_data < ext2->is_data)
228 if (ext1->full_backref > ext2->full_backref)
230 if (ext1->full_backref < ext2->full_backref)
234 return compare_data_backref(node1, node2);
236 return compare_tree_backref(node1, node2);
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
242 struct extent_record {
243 struct list_head backrefs;
244 struct list_head dups;
245 struct rb_root backref_tree;
246 struct list_head list;
247 struct cache_extent cache;
248 struct btrfs_disk_key parent_key;
253 u64 extent_item_refs;
255 u64 parent_generation;
259 unsigned int flag_block_full_backref:2;
260 unsigned int found_rec:1;
261 unsigned int content_checked:1;
262 unsigned int owner_ref_checked:1;
263 unsigned int is_root:1;
264 unsigned int metadata:1;
265 unsigned int bad_full_backref:1;
266 unsigned int crossing_stripes:1;
267 unsigned int wrong_chunk_type:1;
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
272 return container_of(entry, struct extent_record, list);
275 struct inode_backref {
276 struct list_head list;
277 unsigned int found_dir_item:1;
278 unsigned int found_dir_index:1;
279 unsigned int found_inode_ref:1;
280 unsigned int filetype:8;
282 unsigned int ref_type;
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
291 return list_entry(entry, struct inode_backref, list);
294 struct root_item_record {
295 struct list_head list;
302 struct btrfs_key drop_key;
305 #define REF_ERR_NO_DIR_ITEM (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX (1 << 1)
307 #define REF_ERR_NO_INODE_REF (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
310 #define REF_ERR_DUP_INODE_REF (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
319 struct file_extent_hole {
325 struct inode_record {
326 struct list_head backrefs;
327 unsigned int checked:1;
328 unsigned int merging:1;
329 unsigned int found_inode_item:1;
330 unsigned int found_dir_item:1;
331 unsigned int found_file_extent:1;
332 unsigned int found_csum_item:1;
333 unsigned int some_csum_missing:1;
334 unsigned int nodatasum:1;
347 struct rb_root holes;
348 struct list_head orphan_extents;
353 #define I_ERR_NO_INODE_ITEM (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
369 struct root_backref {
370 struct list_head list;
371 unsigned int found_dir_item:1;
372 unsigned int found_dir_index:1;
373 unsigned int found_back_ref:1;
374 unsigned int found_forward_ref:1;
375 unsigned int reachable:1;
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
386 return list_entry(entry, struct root_backref, list);
390 struct list_head backrefs;
391 struct cache_extent cache;
392 unsigned int found_root_item:1;
398 struct cache_extent cache;
403 struct cache_extent cache;
404 struct cache_tree root_cache;
405 struct cache_tree inode_cache;
406 struct inode_record *current;
415 struct walk_control {
416 struct cache_tree shared;
417 struct shared_node *nodes[BTRFS_MAX_LEVEL];
423 struct btrfs_key key;
425 struct list_head list;
428 struct extent_entry {
433 struct list_head list;
436 struct root_item_info {
437 /* level of the root */
439 /* number of nodes at this level, must be 1 for a root */
443 struct cache_extent cache_extent;
447 * Error bit for low memory mode check.
449 * Currently no caller cares about it yet. Just internal use for error
452 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH (1 << 8)
463 static void *print_status_check(void *p)
465 struct task_ctx *priv = p;
466 const char work_indicator[] = { '.', 'o', 'O', 'o' };
468 static char *task_position_string[] = {
470 "checking free space cache",
474 task_period_start(priv->info, 1000 /* 1s */);
476 if (priv->tp == TASK_NOTHING)
480 printf("%s [%c]\r", task_position_string[priv->tp],
481 work_indicator[count % 4]);
484 task_period_wait(priv->info);
489 static int print_status_return(void *p)
497 static enum btrfs_check_mode parse_check_mode(const char *str)
499 if (strcmp(str, "lowmem") == 0)
500 return CHECK_MODE_LOWMEM;
501 if (strcmp(str, "orig") == 0)
502 return CHECK_MODE_ORIGINAL;
503 if (strcmp(str, "original") == 0)
504 return CHECK_MODE_ORIGINAL;
506 return CHECK_MODE_UNKNOWN;
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
512 struct file_extent_hole *hole;
514 if (RB_EMPTY_ROOT(holes))
517 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
523 struct file_extent_hole *hole1;
524 struct file_extent_hole *hole2;
526 hole1 = rb_entry(node1, struct file_extent_hole, node);
527 hole2 = rb_entry(node2, struct file_extent_hole, node);
529 if (hole1->start > hole2->start)
531 if (hole1->start < hole2->start)
533 /* Now hole1->start == hole2->start */
534 if (hole1->len >= hole2->len)
536 * Hole 1 will be merge center
537 * Same hole will be merged later
540 /* Hole 2 will be merge center */
545 * Add a hole to the record
547 * This will do hole merge for copy_file_extent_holes(),
548 * which will ensure there won't be continuous holes.
550 static int add_file_extent_hole(struct rb_root *holes,
553 struct file_extent_hole *hole;
554 struct file_extent_hole *prev = NULL;
555 struct file_extent_hole *next = NULL;
557 hole = malloc(sizeof(*hole));
562 /* Since compare will not return 0, no -EEXIST will happen */
563 rb_insert(holes, &hole->node, compare_hole);
565 /* simple merge with previous hole */
566 if (rb_prev(&hole->node))
567 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
569 if (prev && prev->start + prev->len >= hole->start) {
570 hole->len = hole->start + hole->len - prev->start;
571 hole->start = prev->start;
572 rb_erase(&prev->node, holes);
577 /* iterate merge with next holes */
579 if (!rb_next(&hole->node))
581 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
583 if (hole->start + hole->len >= next->start) {
584 if (hole->start + hole->len <= next->start + next->len)
585 hole->len = next->start + next->len -
587 rb_erase(&next->node, holes);
596 static int compare_hole_range(struct rb_node *node, void *data)
598 struct file_extent_hole *hole;
601 hole = (struct file_extent_hole *)data;
604 hole = rb_entry(node, struct file_extent_hole, node);
605 if (start < hole->start)
607 if (start >= hole->start && start < hole->start + hole->len)
613 * Delete a hole in the record
615 * This will do the hole split and is much restrict than add.
617 static int del_file_extent_hole(struct rb_root *holes,
620 struct file_extent_hole *hole;
621 struct file_extent_hole tmp;
626 struct rb_node *node;
633 node = rb_search(holes, &tmp, compare_hole_range, NULL);
636 hole = rb_entry(node, struct file_extent_hole, node);
637 if (start + len > hole->start + hole->len)
641 * Now there will be no overlap, delete the hole and re-add the
642 * split(s) if they exists.
644 if (start > hole->start) {
645 prev_start = hole->start;
646 prev_len = start - hole->start;
649 if (hole->start + hole->len > start + len) {
650 next_start = start + len;
651 next_len = hole->start + hole->len - start - len;
654 rb_erase(node, holes);
657 ret = add_file_extent_hole(holes, prev_start, prev_len);
662 ret = add_file_extent_hole(holes, next_start, next_len);
669 static int copy_file_extent_holes(struct rb_root *dst,
672 struct file_extent_hole *hole;
673 struct rb_node *node;
676 node = rb_first(src);
678 hole = rb_entry(node, struct file_extent_hole, node);
679 ret = add_file_extent_hole(dst, hole->start, hole->len);
682 node = rb_next(node);
687 static void free_file_extent_holes(struct rb_root *holes)
689 struct rb_node *node;
690 struct file_extent_hole *hole;
692 node = rb_first(holes);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 rb_erase(node, holes);
697 node = rb_first(holes);
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root)
706 if (root->last_trans != trans->transid) {
707 root->track_dirty = 1;
708 root->last_trans = trans->transid;
709 root->commit_root = root->node;
710 extent_buffer_get(root->node);
714 static u8 imode_to_type(u32 imode)
717 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
719 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
720 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
721 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
722 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
723 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
724 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
727 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
733 struct device_record *rec1;
734 struct device_record *rec2;
736 rec1 = rb_entry(node1, struct device_record, node);
737 rec2 = rb_entry(node2, struct device_record, node);
738 if (rec1->devid > rec2->devid)
740 else if (rec1->devid < rec2->devid)
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
748 struct inode_record *rec;
749 struct inode_backref *backref;
750 struct inode_backref *orig;
751 struct inode_backref *tmp;
752 struct orphan_data_extent *src_orphan;
753 struct orphan_data_extent *dst_orphan;
757 rec = malloc(sizeof(*rec));
759 return ERR_PTR(-ENOMEM);
760 memcpy(rec, orig_rec, sizeof(*rec));
762 INIT_LIST_HEAD(&rec->backrefs);
763 INIT_LIST_HEAD(&rec->orphan_extents);
764 rec->holes = RB_ROOT;
766 list_for_each_entry(orig, &orig_rec->backrefs, list) {
767 size = sizeof(*orig) + orig->namelen + 1;
768 backref = malloc(size);
773 memcpy(backref, orig, size);
774 list_add_tail(&backref->list, &rec->backrefs);
776 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777 dst_orphan = malloc(sizeof(*dst_orphan));
782 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
785 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
791 if (!list_empty(&rec->backrefs))
792 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793 list_del(&orig->list);
797 if (!list_empty(&rec->orphan_extents))
798 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799 list_del(&orig->list);
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
811 struct orphan_data_extent *orphan;
813 if (list_empty(orphan_extents))
815 printf("The following data extent is lost in tree %llu:\n",
817 list_for_each_entry(orphan, orphan_extents, list) {
818 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819 orphan->objectid, orphan->offset, orphan->disk_bytenr,
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
826 u64 root_objectid = root->root_key.objectid;
827 int errors = rec->errors;
831 /* reloc root errors, we print its corresponding fs root objectid*/
832 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833 root_objectid = root->root_key.offset;
834 fprintf(stderr, "reloc");
836 fprintf(stderr, "root %llu inode %llu errors %x",
837 (unsigned long long) root_objectid,
838 (unsigned long long) rec->ino, rec->errors);
840 if (errors & I_ERR_NO_INODE_ITEM)
841 fprintf(stderr, ", no inode item");
842 if (errors & I_ERR_NO_ORPHAN_ITEM)
843 fprintf(stderr, ", no orphan item");
844 if (errors & I_ERR_DUP_INODE_ITEM)
845 fprintf(stderr, ", dup inode item");
846 if (errors & I_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & I_ERR_ODD_DIR_ITEM)
849 fprintf(stderr, ", odd dir item");
850 if (errors & I_ERR_ODD_FILE_EXTENT)
851 fprintf(stderr, ", odd file extent");
852 if (errors & I_ERR_BAD_FILE_EXTENT)
853 fprintf(stderr, ", bad file extent");
854 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855 fprintf(stderr, ", file extent overlap");
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857 fprintf(stderr, ", file extent discount");
858 if (errors & I_ERR_DIR_ISIZE_WRONG)
859 fprintf(stderr, ", dir isize wrong");
860 if (errors & I_ERR_FILE_NBYTES_WRONG)
861 fprintf(stderr, ", nbytes wrong");
862 if (errors & I_ERR_ODD_CSUM_ITEM)
863 fprintf(stderr, ", odd csum item");
864 if (errors & I_ERR_SOME_CSUM_MISSING)
865 fprintf(stderr, ", some csum missing");
866 if (errors & I_ERR_LINK_COUNT_WRONG)
867 fprintf(stderr, ", link count wrong");
868 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869 fprintf(stderr, ", orphan file extent");
870 fprintf(stderr, "\n");
871 /* Print the orphan extents if needed */
872 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
875 /* Print the holes if needed */
876 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877 struct file_extent_hole *hole;
878 struct rb_node *node;
881 node = rb_first(&rec->holes);
882 fprintf(stderr, "Found file extent holes:\n");
885 hole = rb_entry(node, struct file_extent_hole, node);
886 fprintf(stderr, "\tstart: %llu, len: %llu\n",
887 hole->start, hole->len);
888 node = rb_next(node);
891 fprintf(stderr, "\tstart: 0, len: %llu\n",
892 round_up(rec->isize, root->sectorsize));
896 static void print_ref_error(int errors)
898 if (errors & REF_ERR_NO_DIR_ITEM)
899 fprintf(stderr, ", no dir item");
900 if (errors & REF_ERR_NO_DIR_INDEX)
901 fprintf(stderr, ", no dir index");
902 if (errors & REF_ERR_NO_INODE_REF)
903 fprintf(stderr, ", no inode ref");
904 if (errors & REF_ERR_DUP_DIR_ITEM)
905 fprintf(stderr, ", dup dir item");
906 if (errors & REF_ERR_DUP_DIR_INDEX)
907 fprintf(stderr, ", dup dir index");
908 if (errors & REF_ERR_DUP_INODE_REF)
909 fprintf(stderr, ", dup inode ref");
910 if (errors & REF_ERR_INDEX_UNMATCH)
911 fprintf(stderr, ", index mismatch");
912 if (errors & REF_ERR_FILETYPE_UNMATCH)
913 fprintf(stderr, ", filetype mismatch");
914 if (errors & REF_ERR_NAME_TOO_LONG)
915 fprintf(stderr, ", name too long");
916 if (errors & REF_ERR_NO_ROOT_REF)
917 fprintf(stderr, ", no root ref");
918 if (errors & REF_ERR_NO_ROOT_BACKREF)
919 fprintf(stderr, ", no root backref");
920 if (errors & REF_ERR_DUP_ROOT_REF)
921 fprintf(stderr, ", dup root ref");
922 if (errors & REF_ERR_DUP_ROOT_BACKREF)
923 fprintf(stderr, ", dup root backref");
924 fprintf(stderr, "\n");
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
930 struct ptr_node *node;
931 struct cache_extent *cache;
932 struct inode_record *rec = NULL;
935 cache = lookup_cache_extent(inode_cache, ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
939 if (mod && rec->refs > 1) {
940 node->data = clone_inode_rec(rec);
941 if (IS_ERR(node->data))
947 rec = calloc(1, sizeof(*rec));
949 return ERR_PTR(-ENOMEM);
951 rec->extent_start = (u64)-1;
953 INIT_LIST_HEAD(&rec->backrefs);
954 INIT_LIST_HEAD(&rec->orphan_extents);
955 rec->holes = RB_ROOT;
957 node = malloc(sizeof(*node));
960 return ERR_PTR(-ENOMEM);
962 node->cache.start = ino;
963 node->cache.size = 1;
966 if (ino == BTRFS_FREE_INO_OBJECTID)
969 ret = insert_cache_extent(inode_cache, &node->cache);
971 return ERR_PTR(-EEXIST);
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
978 struct orphan_data_extent *orphan;
980 while (!list_empty(orphan_extents)) {
981 orphan = list_entry(orphan_extents->next,
982 struct orphan_data_extent, list);
983 list_del(&orphan->list);
988 static void free_inode_rec(struct inode_record *rec)
990 struct inode_backref *backref;
995 while (!list_empty(&rec->backrefs)) {
996 backref = to_inode_backref(rec->backrefs.next);
997 list_del(&backref->list);
1000 free_orphan_data_extents(&rec->orphan_extents);
1001 free_file_extent_holes(&rec->holes);
1005 static int can_free_inode_rec(struct inode_record *rec)
1007 if (!rec->errors && rec->checked && rec->found_inode_item &&
1008 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014 struct inode_record *rec)
1016 struct cache_extent *cache;
1017 struct inode_backref *tmp, *backref;
1018 struct ptr_node *node;
1019 unsigned char filetype;
1021 if (!rec->found_inode_item)
1024 filetype = imode_to_type(rec->imode);
1025 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026 if (backref->found_dir_item && backref->found_dir_index) {
1027 if (backref->filetype != filetype)
1028 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029 if (!backref->errors && backref->found_inode_ref &&
1030 rec->nlink == rec->found_link) {
1031 list_del(&backref->list);
1037 if (!rec->checked || rec->merging)
1040 if (S_ISDIR(rec->imode)) {
1041 if (rec->found_size != rec->isize)
1042 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043 if (rec->found_file_extent)
1044 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046 if (rec->found_dir_item)
1047 rec->errors |= I_ERR_ODD_DIR_ITEM;
1048 if (rec->found_size != rec->nbytes)
1049 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050 if (rec->nlink > 0 && !no_holes &&
1051 (rec->extent_end < rec->isize ||
1052 first_extent_gap(&rec->holes) < rec->isize))
1053 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1056 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057 if (rec->found_csum_item && rec->nodatasum)
1058 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059 if (rec->some_csum_missing && !rec->nodatasum)
1060 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1063 BUG_ON(rec->refs != 1);
1064 if (can_free_inode_rec(rec)) {
1065 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066 node = container_of(cache, struct ptr_node, cache);
1067 BUG_ON(node->data != rec);
1068 remove_cache_extent(inode_cache, &node->cache);
1070 free_inode_rec(rec);
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1076 struct btrfs_path path;
1077 struct btrfs_key key;
1080 key.objectid = BTRFS_ORPHAN_OBJECTID;
1081 key.type = BTRFS_ORPHAN_ITEM_KEY;
1084 btrfs_init_path(&path);
1085 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086 btrfs_release_path(&path);
1092 static int process_inode_item(struct extent_buffer *eb,
1093 int slot, struct btrfs_key *key,
1094 struct shared_node *active_node)
1096 struct inode_record *rec;
1097 struct btrfs_inode_item *item;
1099 rec = active_node->current;
1100 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101 if (rec->found_inode_item) {
1102 rec->errors |= I_ERR_DUP_INODE_ITEM;
1105 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106 rec->nlink = btrfs_inode_nlink(eb, item);
1107 rec->isize = btrfs_inode_size(eb, item);
1108 rec->nbytes = btrfs_inode_nbytes(eb, item);
1109 rec->imode = btrfs_inode_mode(eb, item);
1110 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1112 rec->found_inode_item = 1;
1113 if (rec->nlink == 0)
1114 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115 maybe_free_inode_rec(&active_node->inode_cache, rec);
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1121 int namelen, u64 dir)
1123 struct inode_backref *backref;
1125 list_for_each_entry(backref, &rec->backrefs, list) {
1126 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1128 if (backref->dir != dir || backref->namelen != namelen)
1130 if (memcmp(name, backref->name, namelen))
1135 backref = malloc(sizeof(*backref) + namelen + 1);
1138 memset(backref, 0, sizeof(*backref));
1140 backref->namelen = namelen;
1141 memcpy(backref->name, name, namelen);
1142 backref->name[namelen] = '\0';
1143 list_add_tail(&backref->list, &rec->backrefs);
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148 u64 ino, u64 dir, u64 index,
1149 const char *name, int namelen,
1150 int filetype, int itemtype, int errors)
1152 struct inode_record *rec;
1153 struct inode_backref *backref;
1155 rec = get_inode_rec(inode_cache, ino, 1);
1156 BUG_ON(IS_ERR(rec));
1157 backref = get_inode_backref(rec, name, namelen, dir);
1160 backref->errors |= errors;
1161 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162 if (backref->found_dir_index)
1163 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164 if (backref->found_inode_ref && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 if (backref->found_dir_item && backref->filetype != filetype)
1167 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1169 backref->index = index;
1170 backref->filetype = filetype;
1171 backref->found_dir_index = 1;
1172 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1174 if (backref->found_dir_item)
1175 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176 if (backref->found_dir_index && backref->filetype != filetype)
1177 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1179 backref->filetype = filetype;
1180 backref->found_dir_item = 1;
1181 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183 if (backref->found_inode_ref)
1184 backref->errors |= REF_ERR_DUP_INODE_REF;
1185 if (backref->found_dir_index && backref->index != index)
1186 backref->errors |= REF_ERR_INDEX_UNMATCH;
1188 backref->index = index;
1190 backref->ref_type = itemtype;
1191 backref->found_inode_ref = 1;
1196 maybe_free_inode_rec(inode_cache, rec);
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201 struct cache_tree *dst_cache)
1203 struct inode_backref *backref;
1208 list_for_each_entry(backref, &src->backrefs, list) {
1209 if (backref->found_dir_index) {
1210 add_inode_backref(dst_cache, dst->ino, backref->dir,
1211 backref->index, backref->name,
1212 backref->namelen, backref->filetype,
1213 BTRFS_DIR_INDEX_KEY, backref->errors);
1215 if (backref->found_dir_item) {
1217 add_inode_backref(dst_cache, dst->ino,
1218 backref->dir, 0, backref->name,
1219 backref->namelen, backref->filetype,
1220 BTRFS_DIR_ITEM_KEY, backref->errors);
1222 if (backref->found_inode_ref) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, backref->index,
1225 backref->name, backref->namelen, 0,
1226 backref->ref_type, backref->errors);
1230 if (src->found_dir_item)
1231 dst->found_dir_item = 1;
1232 if (src->found_file_extent)
1233 dst->found_file_extent = 1;
1234 if (src->found_csum_item)
1235 dst->found_csum_item = 1;
1236 if (src->some_csum_missing)
1237 dst->some_csum_missing = 1;
1238 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1244 BUG_ON(src->found_link < dir_count);
1245 dst->found_link += src->found_link - dir_count;
1246 dst->found_size += src->found_size;
1247 if (src->extent_start != (u64)-1) {
1248 if (dst->extent_start == (u64)-1) {
1249 dst->extent_start = src->extent_start;
1250 dst->extent_end = src->extent_end;
1252 if (dst->extent_end > src->extent_start)
1253 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254 else if (dst->extent_end < src->extent_start) {
1255 ret = add_file_extent_hole(&dst->holes,
1257 src->extent_start - dst->extent_end);
1259 if (dst->extent_end < src->extent_end)
1260 dst->extent_end = src->extent_end;
1264 dst->errors |= src->errors;
1265 if (src->found_inode_item) {
1266 if (!dst->found_inode_item) {
1267 dst->nlink = src->nlink;
1268 dst->isize = src->isize;
1269 dst->nbytes = src->nbytes;
1270 dst->imode = src->imode;
1271 dst->nodatasum = src->nodatasum;
1272 dst->found_inode_item = 1;
1274 dst->errors |= I_ERR_DUP_INODE_ITEM;
1282 static int splice_shared_node(struct shared_node *src_node,
1283 struct shared_node *dst_node)
1285 struct cache_extent *cache;
1286 struct ptr_node *node, *ins;
1287 struct cache_tree *src, *dst;
1288 struct inode_record *rec, *conflict;
1289 u64 current_ino = 0;
1293 if (--src_node->refs == 0)
1295 if (src_node->current)
1296 current_ino = src_node->current->ino;
1298 src = &src_node->root_cache;
1299 dst = &dst_node->root_cache;
1301 cache = search_cache_extent(src, 0);
1303 node = container_of(cache, struct ptr_node, cache);
1305 cache = next_cache_extent(cache);
1308 remove_cache_extent(src, &node->cache);
1311 ins = malloc(sizeof(*ins));
1313 ins->cache.start = node->cache.start;
1314 ins->cache.size = node->cache.size;
1318 ret = insert_cache_extent(dst, &ins->cache);
1319 if (ret == -EEXIST) {
1320 conflict = get_inode_rec(dst, rec->ino, 1);
1321 BUG_ON(IS_ERR(conflict));
1322 merge_inode_recs(rec, conflict, dst);
1324 conflict->checked = 1;
1325 if (dst_node->current == conflict)
1326 dst_node->current = NULL;
1328 maybe_free_inode_rec(dst, conflict);
1329 free_inode_rec(rec);
1336 if (src == &src_node->root_cache) {
1337 src = &src_node->inode_cache;
1338 dst = &dst_node->inode_cache;
1342 if (current_ino > 0 && (!dst_node->current ||
1343 current_ino > dst_node->current->ino)) {
1344 if (dst_node->current) {
1345 dst_node->current->checked = 1;
1346 maybe_free_inode_rec(dst, dst_node->current);
1348 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349 BUG_ON(IS_ERR(dst_node->current));
1354 static void free_inode_ptr(struct cache_extent *cache)
1356 struct ptr_node *node;
1357 struct inode_record *rec;
1359 node = container_of(cache, struct ptr_node, cache);
1361 free_inode_rec(rec);
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1370 struct cache_extent *cache;
1371 struct shared_node *node;
1373 cache = lookup_cache_extent(shared, bytenr, 1);
1375 node = container_of(cache, struct shared_node, cache);
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1384 struct shared_node *node;
1386 node = calloc(1, sizeof(*node));
1389 node->cache.start = bytenr;
1390 node->cache.size = 1;
1391 cache_tree_init(&node->root_cache);
1392 cache_tree_init(&node->inode_cache);
1395 ret = insert_cache_extent(shared, &node->cache);
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401 struct walk_control *wc, int level)
1403 struct shared_node *node;
1404 struct shared_node *dest;
1407 if (level == wc->active_node)
1410 BUG_ON(wc->active_node <= level);
1411 node = find_shared_node(&wc->shared, bytenr);
1413 ret = add_shared_node(&wc->shared, bytenr, refs);
1415 node = find_shared_node(&wc->shared, bytenr);
1416 wc->nodes[level] = node;
1417 wc->active_node = level;
1421 if (wc->root_level == wc->active_node &&
1422 btrfs_root_refs(&root->root_item) == 0) {
1423 if (--node->refs == 0) {
1424 free_inode_recs_tree(&node->root_cache);
1425 free_inode_recs_tree(&node->inode_cache);
1426 remove_cache_extent(&wc->shared, &node->cache);
1432 dest = wc->nodes[wc->active_node];
1433 splice_shared_node(node, dest);
1434 if (node->refs == 0) {
1435 remove_cache_extent(&wc->shared, &node->cache);
1441 static int leave_shared_node(struct btrfs_root *root,
1442 struct walk_control *wc, int level)
1444 struct shared_node *node;
1445 struct shared_node *dest;
1448 if (level == wc->root_level)
1451 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1455 BUG_ON(i >= BTRFS_MAX_LEVEL);
1457 node = wc->nodes[wc->active_node];
1458 wc->nodes[wc->active_node] = NULL;
1459 wc->active_node = i;
1461 dest = wc->nodes[wc->active_node];
1462 if (wc->active_node < wc->root_level ||
1463 btrfs_root_refs(&root->root_item) > 0) {
1464 BUG_ON(node->refs <= 1);
1465 splice_shared_node(node, dest);
1467 BUG_ON(node->refs < 2);
1476 * 1 - if the root with id child_root_id is a child of root parent_root_id
1477 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1478 * has other root(s) as parent(s)
1479 * 2 - if the root child_root_id doesn't have any parent roots
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1484 struct btrfs_path path;
1485 struct btrfs_key key;
1486 struct extent_buffer *leaf;
1490 btrfs_init_path(&path);
1492 key.objectid = parent_root_id;
1493 key.type = BTRFS_ROOT_REF_KEY;
1494 key.offset = child_root_id;
1495 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1499 btrfs_release_path(&path);
1503 key.objectid = child_root_id;
1504 key.type = BTRFS_ROOT_BACKREF_KEY;
1506 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1512 leaf = path.nodes[0];
1513 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1517 leaf = path.nodes[0];
1520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521 if (key.objectid != child_root_id ||
1522 key.type != BTRFS_ROOT_BACKREF_KEY)
1527 if (key.offset == parent_root_id) {
1528 btrfs_release_path(&path);
1535 btrfs_release_path(&path);
1538 return has_parent ? 0 : 2;
1541 static int process_dir_item(struct btrfs_root *root,
1542 struct extent_buffer *eb,
1543 int slot, struct btrfs_key *key,
1544 struct shared_node *active_node)
1554 struct btrfs_dir_item *di;
1555 struct inode_record *rec;
1556 struct cache_tree *root_cache;
1557 struct cache_tree *inode_cache;
1558 struct btrfs_key location;
1559 char namebuf[BTRFS_NAME_LEN];
1561 root_cache = &active_node->root_cache;
1562 inode_cache = &active_node->inode_cache;
1563 rec = active_node->current;
1564 rec->found_dir_item = 1;
1566 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567 total = btrfs_item_size_nr(eb, slot);
1568 while (cur < total) {
1570 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571 name_len = btrfs_dir_name_len(eb, di);
1572 data_len = btrfs_dir_data_len(eb, di);
1573 filetype = btrfs_dir_type(eb, di);
1575 rec->found_size += name_len;
1576 if (name_len <= BTRFS_NAME_LEN) {
1580 len = BTRFS_NAME_LEN;
1581 error = REF_ERR_NAME_TOO_LONG;
1583 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1585 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586 add_inode_backref(inode_cache, location.objectid,
1587 key->objectid, key->offset, namebuf,
1588 len, filetype, key->type, error);
1589 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590 add_inode_backref(root_cache, location.objectid,
1591 key->objectid, key->offset,
1592 namebuf, len, filetype,
1595 fprintf(stderr, "invalid location in dir item %u\n",
1597 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598 key->objectid, key->offset, namebuf,
1599 len, filetype, key->type, error);
1602 len = sizeof(*di) + name_len + data_len;
1603 di = (struct btrfs_dir_item *)((char *)di + len);
1606 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607 rec->errors |= I_ERR_DUP_DIR_INDEX;
1612 static int process_inode_ref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1622 struct cache_tree *inode_cache;
1623 struct btrfs_inode_ref *ref;
1624 char namebuf[BTRFS_NAME_LEN];
1626 inode_cache = &active_node->inode_cache;
1628 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629 total = btrfs_item_size_nr(eb, slot);
1630 while (cur < total) {
1631 name_len = btrfs_inode_ref_name_len(eb, ref);
1632 index = btrfs_inode_ref_index(eb, ref);
1633 if (name_len <= BTRFS_NAME_LEN) {
1637 len = BTRFS_NAME_LEN;
1638 error = REF_ERR_NAME_TOO_LONG;
1640 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641 add_inode_backref(inode_cache, key->objectid, key->offset,
1642 index, namebuf, len, 0, key->type, error);
1644 len = sizeof(*ref) + name_len;
1645 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1651 static int process_inode_extref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1662 struct cache_tree *inode_cache;
1663 struct btrfs_inode_extref *extref;
1664 char namebuf[BTRFS_NAME_LEN];
1666 inode_cache = &active_node->inode_cache;
1668 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669 total = btrfs_item_size_nr(eb, slot);
1670 while (cur < total) {
1671 name_len = btrfs_inode_extref_name_len(eb, extref);
1672 index = btrfs_inode_extref_index(eb, extref);
1673 parent = btrfs_inode_extref_parent(eb, extref);
1674 if (name_len <= BTRFS_NAME_LEN) {
1678 len = BTRFS_NAME_LEN;
1679 error = REF_ERR_NAME_TOO_LONG;
1681 read_extent_buffer(eb, namebuf,
1682 (unsigned long)(extref + 1), len);
1683 add_inode_backref(inode_cache, key->objectid, parent,
1684 index, namebuf, len, 0, key->type, error);
1686 len = sizeof(*extref) + name_len;
1687 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695 u64 len, u64 *found)
1697 struct btrfs_key key;
1698 struct btrfs_path path;
1699 struct extent_buffer *leaf;
1704 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1706 btrfs_init_path(&path);
1708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1710 key.type = BTRFS_EXTENT_CSUM_KEY;
1712 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1716 if (ret > 0 && path.slots[0] > 0) {
1717 leaf = path.nodes[0];
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720 key.type == BTRFS_EXTENT_CSUM_KEY)
1725 leaf = path.nodes[0];
1726 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1732 leaf = path.nodes[0];
1735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737 key.type != BTRFS_EXTENT_CSUM_KEY)
1740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741 if (key.offset >= start + len)
1744 if (key.offset > start)
1747 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749 if (csum_end > start) {
1750 size = min(csum_end - start, len);
1759 btrfs_release_path(&path);
1765 static int process_file_extent(struct btrfs_root *root,
1766 struct extent_buffer *eb,
1767 int slot, struct btrfs_key *key,
1768 struct shared_node *active_node)
1770 struct inode_record *rec;
1771 struct btrfs_file_extent_item *fi;
1773 u64 disk_bytenr = 0;
1774 u64 extent_offset = 0;
1775 u64 mask = root->sectorsize - 1;
1779 rec = active_node->current;
1780 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781 rec->found_file_extent = 1;
1783 if (rec->extent_start == (u64)-1) {
1784 rec->extent_start = key->offset;
1785 rec->extent_end = key->offset;
1788 if (rec->extent_end > key->offset)
1789 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790 else if (rec->extent_end < key->offset) {
1791 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792 key->offset - rec->extent_end);
1797 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798 extent_type = btrfs_file_extent_type(eb, fi);
1800 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->found_size += num_bytes;
1805 num_bytes = (num_bytes + mask) & ~mask;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810 extent_offset = btrfs_file_extent_offset(eb, fi);
1811 if (num_bytes == 0 || (num_bytes & mask))
1812 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813 if (num_bytes + extent_offset >
1814 btrfs_file_extent_ram_bytes(eb, fi))
1815 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817 (btrfs_file_extent_compression(eb, fi) ||
1818 btrfs_file_extent_encryption(eb, fi) ||
1819 btrfs_file_extent_other_encoding(eb, fi)))
1820 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821 if (disk_bytenr > 0)
1822 rec->found_size += num_bytes;
1824 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1826 rec->extent_end = key->offset + num_bytes;
1829 * The data reloc tree will copy full extents into its inode and then
1830 * copy the corresponding csums. Because the extent it copied could be
1831 * a preallocated extent that hasn't been written to yet there may be no
1832 * csums to copy, ergo we won't have csums for our file extent. This is
1833 * ok so just don't bother checking csums if the inode belongs to the
1836 if (disk_bytenr > 0 &&
1837 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1839 if (btrfs_file_extent_compression(eb, fi))
1840 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1842 disk_bytenr += extent_offset;
1844 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1847 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1849 rec->found_csum_item = 1;
1850 if (found < num_bytes)
1851 rec->some_csum_missing = 1;
1852 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1854 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861 struct walk_control *wc)
1863 struct btrfs_key key;
1867 struct cache_tree *inode_cache;
1868 struct shared_node *active_node;
1870 if (wc->root_level == wc->active_node &&
1871 btrfs_root_refs(&root->root_item) == 0)
1874 active_node = wc->nodes[wc->active_node];
1875 inode_cache = &active_node->inode_cache;
1876 nritems = btrfs_header_nritems(eb);
1877 for (i = 0; i < nritems; i++) {
1878 btrfs_item_key_to_cpu(eb, &key, i);
1880 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1882 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1885 if (active_node->current == NULL ||
1886 active_node->current->ino < key.objectid) {
1887 if (active_node->current) {
1888 active_node->current->checked = 1;
1889 maybe_free_inode_rec(inode_cache,
1890 active_node->current);
1892 active_node->current = get_inode_rec(inode_cache,
1894 BUG_ON(IS_ERR(active_node->current));
1897 case BTRFS_DIR_ITEM_KEY:
1898 case BTRFS_DIR_INDEX_KEY:
1899 ret = process_dir_item(root, eb, i, &key, active_node);
1901 case BTRFS_INODE_REF_KEY:
1902 ret = process_inode_ref(eb, i, &key, active_node);
1904 case BTRFS_INODE_EXTREF_KEY:
1905 ret = process_inode_extref(eb, i, &key, active_node);
1907 case BTRFS_INODE_ITEM_KEY:
1908 ret = process_inode_item(eb, i, &key, active_node);
1910 case BTRFS_EXTENT_DATA_KEY:
1911 ret = process_file_extent(root, eb, i, &key,
1921 static void reada_walk_down(struct btrfs_root *root,
1922 struct extent_buffer *node, int slot)
1931 level = btrfs_header_level(node);
1935 nritems = btrfs_header_nritems(node);
1936 blocksize = root->nodesize;
1937 for (i = slot; i < nritems; i++) {
1938 bytenr = btrfs_node_blockptr(node, i);
1939 ptr_gen = btrfs_node_ptr_generation(node, i);
1940 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1945 * Check the child node/leaf by the following condition:
1946 * 1. the first item key of the node/leaf should be the same with the one
1948 * 2. block in parent node should match the child node/leaf.
1949 * 3. generation of parent node and child's header should be consistent.
1951 * Or the child node/leaf pointed by the key in parent is not valid.
1953 * We hope to check leaf owner too, but since subvol may share leaves,
1954 * which makes leaf owner check not so strong, key check should be
1955 * sufficient enough for that case.
1957 static int check_child_node(struct btrfs_root *root,
1958 struct extent_buffer *parent, int slot,
1959 struct extent_buffer *child)
1961 struct btrfs_key parent_key;
1962 struct btrfs_key child_key;
1965 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966 if (btrfs_header_level(child) == 0)
1967 btrfs_item_key_to_cpu(child, &child_key, 0);
1969 btrfs_node_key_to_cpu(child, &child_key, 0);
1971 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1974 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975 parent_key.objectid, parent_key.type, parent_key.offset,
1976 child_key.objectid, child_key.type, child_key.offset);
1978 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1980 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981 btrfs_node_blockptr(parent, slot),
1982 btrfs_header_bytenr(child));
1984 if (btrfs_node_ptr_generation(parent, slot) !=
1985 btrfs_header_generation(child)) {
1987 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988 btrfs_header_generation(child),
1989 btrfs_node_ptr_generation(parent, slot));
1995 u64 bytenr[BTRFS_MAX_LEVEL];
1996 u64 refs[BTRFS_MAX_LEVEL];
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000 struct walk_control *wc, int *level,
2001 struct node_refs *nrefs)
2003 enum btrfs_tree_block_status status;
2006 struct extent_buffer *next;
2007 struct extent_buffer *cur;
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2015 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016 refs = nrefs->refs[*level];
2019 ret = btrfs_lookup_extent_info(NULL, root,
2020 path->nodes[*level]->start,
2021 *level, 1, &refs, NULL);
2026 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027 nrefs->refs[*level] = refs;
2031 ret = enter_shared_node(root, path->nodes[*level]->start,
2039 while (*level >= 0) {
2040 WARN_ON(*level < 0);
2041 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042 cur = path->nodes[*level];
2044 if (btrfs_header_level(cur) != *level)
2047 if (path->slots[*level] >= btrfs_header_nritems(cur))
2050 ret = process_one_leaf(root, cur, wc);
2055 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057 blocksize = root->nodesize;
2059 if (bytenr == nrefs->bytenr[*level - 1]) {
2060 refs = nrefs->refs[*level - 1];
2062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063 *level - 1, 1, &refs, NULL);
2067 nrefs->bytenr[*level - 1] = bytenr;
2068 nrefs->refs[*level - 1] = refs;
2073 ret = enter_shared_node(root, bytenr, refs,
2076 path->slots[*level]++;
2081 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083 free_extent_buffer(next);
2084 reada_walk_down(root, cur, path->slots[*level]);
2085 next = read_tree_block(root, bytenr, blocksize,
2087 if (!extent_buffer_uptodate(next)) {
2088 struct btrfs_key node_key;
2090 btrfs_node_key_to_cpu(path->nodes[*level],
2092 path->slots[*level]);
2093 btrfs_add_corrupt_extent_record(root->fs_info,
2095 path->nodes[*level]->start,
2096 root->nodesize, *level);
2102 ret = check_child_node(root, cur, path->slots[*level], next);
2108 if (btrfs_is_leaf(next))
2109 status = btrfs_check_leaf(root, NULL, next);
2111 status = btrfs_check_node(root, NULL, next);
2112 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113 free_extent_buffer(next);
2118 *level = *level - 1;
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = next;
2121 path->slots[*level] = 0;
2124 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129 struct walk_control *wc, int *level)
2132 struct extent_buffer *leaf;
2134 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135 leaf = path->nodes[i];
2136 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2141 free_extent_buffer(path->nodes[*level]);
2142 path->nodes[*level] = NULL;
2143 BUG_ON(*level > wc->active_node);
2144 if (*level == wc->active_node)
2145 leave_shared_node(root, wc, *level);
2152 static int check_root_dir(struct inode_record *rec)
2154 struct inode_backref *backref;
2157 if (!rec->found_inode_item || rec->errors)
2159 if (rec->nlink != 1 || rec->found_link != 0)
2161 if (list_empty(&rec->backrefs))
2163 backref = to_inode_backref(rec->backrefs.next);
2164 if (!backref->found_inode_ref)
2166 if (backref->index != 0 || backref->namelen != 2 ||
2167 memcmp(backref->name, "..", 2))
2169 if (backref->found_dir_index || backref->found_dir_item)
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177 struct btrfs_root *root, struct btrfs_path *path,
2178 struct inode_record *rec)
2180 struct btrfs_inode_item *ei;
2181 struct btrfs_key key;
2184 key.objectid = rec->ino;
2185 key.type = BTRFS_INODE_ITEM_KEY;
2186 key.offset = (u64)-1;
2188 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2192 if (!path->slots[0]) {
2199 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200 if (key.objectid != rec->ino) {
2205 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206 struct btrfs_inode_item);
2207 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208 btrfs_mark_buffer_dirty(path->nodes[0]);
2209 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211 root->root_key.objectid);
2213 btrfs_release_path(path);
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218 struct btrfs_root *root,
2219 struct btrfs_path *path,
2220 struct inode_record *rec)
2224 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225 btrfs_release_path(path);
2227 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_path *path,
2234 struct inode_record *rec)
2236 struct btrfs_inode_item *ei;
2237 struct btrfs_key key;
2240 key.objectid = rec->ino;
2241 key.type = BTRFS_INODE_ITEM_KEY;
2244 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251 /* Since ret == 0, no need to check anything */
2252 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253 struct btrfs_inode_item);
2254 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255 btrfs_mark_buffer_dirty(path->nodes[0]);
2256 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257 printf("reset nbytes for ino %llu root %llu\n",
2258 rec->ino, root->root_key.objectid);
2260 btrfs_release_path(path);
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265 struct cache_tree *inode_cache,
2266 struct inode_record *rec,
2267 struct inode_backref *backref)
2269 struct btrfs_path *path;
2270 struct btrfs_trans_handle *trans;
2271 struct btrfs_dir_item *dir_item;
2272 struct extent_buffer *leaf;
2273 struct btrfs_key key;
2274 struct btrfs_disk_key disk_key;
2275 struct inode_record *dir_rec;
2276 unsigned long name_ptr;
2277 u32 data_size = sizeof(*dir_item) + backref->namelen;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2290 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291 (unsigned long long)rec->ino);
2292 key.objectid = backref->dir;
2293 key.type = BTRFS_DIR_INDEX_KEY;
2294 key.offset = backref->index;
2296 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2299 leaf = path->nodes[0];
2300 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2302 disk_key.objectid = cpu_to_le64(rec->ino);
2303 disk_key.type = BTRFS_INODE_ITEM_KEY;
2304 disk_key.offset = 0;
2306 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308 btrfs_set_dir_data_len(leaf, dir_item, 0);
2309 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310 name_ptr = (unsigned long)(dir_item + 1);
2311 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312 btrfs_mark_buffer_dirty(leaf);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2316 backref->found_dir_index = 1;
2317 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318 BUG_ON(IS_ERR(dir_rec));
2321 dir_rec->found_size += backref->namelen;
2322 if (dir_rec->found_size == dir_rec->isize &&
2323 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325 if (dir_rec->found_size != dir_rec->isize)
2326 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2331 static int delete_dir_index(struct btrfs_root *root,
2332 struct cache_tree *inode_cache,
2333 struct inode_record *rec,
2334 struct inode_backref *backref)
2336 struct btrfs_trans_handle *trans;
2337 struct btrfs_dir_item *di;
2338 struct btrfs_path *path;
2341 path = btrfs_alloc_path();
2345 trans = btrfs_start_transaction(root, 1);
2346 if (IS_ERR(trans)) {
2347 btrfs_free_path(path);
2348 return PTR_ERR(trans);
2352 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353 (unsigned long long)backref->dir,
2354 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355 (unsigned long long)root->objectid);
2357 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358 backref->name, backref->namelen,
2359 backref->index, -1);
2362 btrfs_free_path(path);
2363 btrfs_commit_transaction(trans, root);
2370 ret = btrfs_del_item(trans, root, path);
2372 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2374 btrfs_free_path(path);
2375 btrfs_commit_transaction(trans, root);
2379 static int create_inode_item(struct btrfs_root *root,
2380 struct inode_record *rec,
2381 struct inode_backref *backref, int root_dir)
2383 struct btrfs_trans_handle *trans;
2384 struct btrfs_inode_item inode_item;
2385 time_t now = time(NULL);
2388 trans = btrfs_start_transaction(root, 1);
2389 if (IS_ERR(trans)) {
2390 ret = PTR_ERR(trans);
2394 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395 "be incomplete, please check permissions and content after "
2396 "the fsck completes.\n", (unsigned long long)root->objectid,
2397 (unsigned long long)rec->ino);
2399 memset(&inode_item, 0, sizeof(inode_item));
2400 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2402 btrfs_set_stack_inode_nlink(&inode_item, 1);
2404 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406 if (rec->found_dir_item) {
2407 if (rec->found_file_extent)
2408 fprintf(stderr, "root %llu inode %llu has both a dir "
2409 "item and extents, unsure if it is a dir or a "
2410 "regular file so setting it as a directory\n",
2411 (unsigned long long)root->objectid,
2412 (unsigned long long)rec->ino);
2413 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415 } else if (!rec->found_dir_item) {
2416 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2419 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2428 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2430 btrfs_commit_transaction(trans, root);
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435 struct inode_record *rec,
2436 struct cache_tree *inode_cache,
2439 struct inode_backref *tmp, *backref;
2440 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2444 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445 if (!delete && rec->ino == root_dirid) {
2446 if (!rec->found_inode_item) {
2447 ret = create_inode_item(root, rec, backref, 1);
2454 /* Index 0 for root dir's are special, don't mess with it */
2455 if (rec->ino == root_dirid && backref->index == 0)
2459 ((backref->found_dir_index && !backref->found_inode_ref) ||
2460 (backref->found_dir_index && backref->found_inode_ref &&
2461 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462 ret = delete_dir_index(root, inode_cache, rec, backref);
2466 list_del(&backref->list);
2470 if (!delete && !backref->found_dir_index &&
2471 backref->found_dir_item && backref->found_inode_ref) {
2472 ret = add_missing_dir_index(root, inode_cache, rec,
2477 if (backref->found_dir_item &&
2478 backref->found_dir_index &&
2479 backref->found_dir_index) {
2480 if (!backref->errors &&
2481 backref->found_inode_ref) {
2482 list_del(&backref->list);
2488 if (!delete && (!backref->found_dir_index &&
2489 !backref->found_dir_item &&
2490 backref->found_inode_ref)) {
2491 struct btrfs_trans_handle *trans;
2492 struct btrfs_key location;
2494 ret = check_dir_conflict(root, backref->name,
2500 * let nlink fixing routine to handle it,
2501 * which can do it better.
2506 location.objectid = rec->ino;
2507 location.type = BTRFS_INODE_ITEM_KEY;
2508 location.offset = 0;
2510 trans = btrfs_start_transaction(root, 1);
2511 if (IS_ERR(trans)) {
2512 ret = PTR_ERR(trans);
2515 fprintf(stderr, "adding missing dir index/item pair "
2517 (unsigned long long)rec->ino);
2518 ret = btrfs_insert_dir_item(trans, root, backref->name,
2520 backref->dir, &location,
2521 imode_to_type(rec->imode),
2524 btrfs_commit_transaction(trans, root);
2528 if (!delete && (backref->found_inode_ref &&
2529 backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532 !rec->found_inode_item)) {
2533 ret = create_inode_item(root, rec, backref, 0);
2540 return ret ? ret : repaired;
2544 * To determine the file type for nlink/inode_item repair
2546 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547 * Return -ENOENT if file type is not found.
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2551 struct inode_backref *backref;
2553 /* For inode item recovered case */
2554 if (rec->found_inode_item) {
2555 *type = imode_to_type(rec->imode);
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item) {
2561 *type = backref->filetype;
2569 * To determine the file name for nlink repair
2571 * Return 0 if file name is found, set name and namelen.
2572 * Return -ENOENT if file name is not found.
2574 static int find_file_name(struct inode_record *rec,
2575 char *name, int *namelen)
2577 struct inode_backref *backref;
2579 list_for_each_entry(backref, &rec->backrefs, list) {
2580 if (backref->found_dir_index || backref->found_dir_item ||
2581 backref->found_inode_ref) {
2582 memcpy(name, backref->name, backref->namelen);
2583 *namelen = backref->namelen;
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592 struct btrfs_root *root,
2593 struct btrfs_path *path,
2594 struct inode_record *rec)
2596 struct inode_backref *backref;
2597 struct inode_backref *tmp;
2598 struct btrfs_key key;
2599 struct btrfs_inode_item *inode_item;
2602 /* We don't believe this either, reset it and iterate backref */
2603 rec->found_link = 0;
2605 /* Remove all backref including the valid ones */
2606 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608 backref->index, backref->name,
2609 backref->namelen, 0);
2613 /* remove invalid backref, so it won't be added back */
2614 if (!(backref->found_dir_index &&
2615 backref->found_dir_item &&
2616 backref->found_inode_ref)) {
2617 list_del(&backref->list);
2624 /* Set nlink to 0 */
2625 key.objectid = rec->ino;
2626 key.type = BTRFS_INODE_ITEM_KEY;
2628 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636 struct btrfs_inode_item);
2637 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638 btrfs_mark_buffer_dirty(path->nodes[0]);
2639 btrfs_release_path(path);
2642 * Add back valid inode_ref/dir_item/dir_index,
2643 * add_link() will handle the nlink inc, so new nlink must be correct
2645 list_for_each_entry(backref, &rec->backrefs, list) {
2646 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647 backref->name, backref->namelen,
2648 backref->filetype, &backref->index, 1);
2653 btrfs_release_path(path);
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658 struct btrfs_root *root,
2659 struct btrfs_path *path,
2660 struct inode_record *rec)
2662 char *dir_name = "lost+found";
2663 char namebuf[BTRFS_NAME_LEN] = {0};
2668 int name_recovered = 0;
2669 int type_recovered = 0;
2673 * Get file name and type first before these invalid inode ref
2674 * are deleted by remove_all_invalid_backref()
2676 name_recovered = !find_file_name(rec, namebuf, &namelen);
2677 type_recovered = !find_file_type(rec, &type);
2679 if (!name_recovered) {
2680 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681 rec->ino, rec->ino);
2682 namelen = count_digits(rec->ino);
2683 sprintf(namebuf, "%llu", rec->ino);
2686 if (!type_recovered) {
2687 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2689 type = BTRFS_FT_REG_FILE;
2693 ret = reset_nlink(trans, root, path, rec);
2696 "Failed to reset nlink for inode %llu: %s\n",
2697 rec->ino, strerror(-ret));
2701 if (rec->found_link == 0) {
2702 lost_found_ino = root->highest_inode;
2703 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2708 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2712 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713 dir_name, strerror(-ret));
2716 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717 namebuf, namelen, type, NULL, 1);
2719 * Add ".INO" suffix several times to handle case where
2720 * "FILENAME.INO" is already taken by another file.
2722 while (ret == -EEXIST) {
2724 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2726 if (namelen + count_digits(rec->ino) + 1 >
2731 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2733 namelen += count_digits(rec->ino) + 1;
2734 ret = btrfs_add_link(trans, root, rec->ino,
2735 lost_found_ino, namebuf,
2736 namelen, type, NULL, 1);
2740 "Failed to link the inode %llu to %s dir: %s\n",
2741 rec->ino, dir_name, strerror(-ret));
2745 * Just increase the found_link, don't actually add the
2746 * backref. This will make things easier and this inode
2747 * record will be freed after the repair is done.
2748 * So fsck will not report problem about this inode.
2751 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752 namelen, namebuf, dir_name);
2754 printf("Fixed the nlink of inode %llu\n", rec->ino);
2757 * Clear the flag anyway, or we will loop forever for the same inode
2758 * as it will not be removed from the bad inode list and the dead loop
2761 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762 btrfs_release_path(path);
2767 * Check if there is any normal(reg or prealloc) file extent for given
2769 * This is used to determine the file type when neither its dir_index/item or
2770 * inode_item exists.
2772 * This will *NOT* report error, if any error happens, just consider it does
2773 * not have any normal file extent.
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2777 struct btrfs_path *path;
2778 struct btrfs_key key;
2779 struct btrfs_key found_key;
2780 struct btrfs_file_extent_item *fi;
2784 path = btrfs_alloc_path();
2788 key.type = BTRFS_EXTENT_DATA_KEY;
2791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2796 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797 ret = btrfs_next_leaf(root, path);
2804 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2806 if (found_key.objectid != ino ||
2807 found_key.type != BTRFS_EXTENT_DATA_KEY)
2809 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810 struct btrfs_file_extent_item);
2811 type = btrfs_file_extent_type(path->nodes[0], fi);
2812 if (type != BTRFS_FILE_EXTENT_INLINE) {
2818 btrfs_free_path(path);
2822 static u32 btrfs_type_to_imode(u8 type)
2824 static u32 imode_by_btrfs_type[] = {
2825 [BTRFS_FT_REG_FILE] = S_IFREG,
2826 [BTRFS_FT_DIR] = S_IFDIR,
2827 [BTRFS_FT_CHRDEV] = S_IFCHR,
2828 [BTRFS_FT_BLKDEV] = S_IFBLK,
2829 [BTRFS_FT_FIFO] = S_IFIFO,
2830 [BTRFS_FT_SOCK] = S_IFSOCK,
2831 [BTRFS_FT_SYMLINK] = S_IFLNK,
2834 return imode_by_btrfs_type[(type)];
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root,
2839 struct btrfs_path *path,
2840 struct inode_record *rec)
2844 int type_recovered = 0;
2847 printf("Trying to rebuild inode:%llu\n", rec->ino);
2849 type_recovered = !find_file_type(rec, &filetype);
2852 * Try to determine inode type if type not found.
2854 * For found regular file extent, it must be FILE.
2855 * For found dir_item/index, it must be DIR.
2857 * For undetermined one, use FILE as fallback.
2860 * 1. If found backref(inode_index/item is already handled) to it,
2862 * Need new inode-inode ref structure to allow search for that.
2864 if (!type_recovered) {
2865 if (rec->found_file_extent &&
2866 find_normal_file_extent(root, rec->ino)) {
2868 filetype = BTRFS_FT_REG_FILE;
2869 } else if (rec->found_dir_item) {
2871 filetype = BTRFS_FT_DIR;
2872 } else if (!list_empty(&rec->orphan_extents)) {
2874 filetype = BTRFS_FT_REG_FILE;
2876 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2879 filetype = BTRFS_FT_REG_FILE;
2883 ret = btrfs_new_inode(trans, root, rec->ino,
2884 mode | btrfs_type_to_imode(filetype));
2889 * Here inode rebuild is done, we only rebuild the inode item,
2890 * don't repair the nlink(like move to lost+found).
2891 * That is the job of nlink repair.
2893 * We just fill the record and return
2895 rec->found_dir_item = 1;
2896 rec->imode = mode | btrfs_type_to_imode(filetype);
2898 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899 /* Ensure the inode_nlinks repair function will be called */
2900 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct orphan_data_extent *orphan;
2911 struct orphan_data_extent *tmp;
2914 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2916 * Check for conflicting file extents
2918 * Here we don't know whether the extents is compressed or not,
2919 * so we can only assume it not compressed nor data offset,
2920 * and use its disk_len as extent length.
2922 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923 orphan->offset, orphan->disk_len, 0);
2924 btrfs_release_path(path);
2929 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930 orphan->disk_bytenr, orphan->disk_len);
2931 ret = btrfs_free_extent(trans,
2932 root->fs_info->extent_root,
2933 orphan->disk_bytenr, orphan->disk_len,
2934 0, root->objectid, orphan->objectid,
2939 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940 orphan->offset, orphan->disk_bytenr,
2941 orphan->disk_len, orphan->disk_len);
2945 /* Update file size info */
2946 rec->found_size += orphan->disk_len;
2947 if (rec->found_size == rec->nbytes)
2948 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2950 /* Update the file extent hole info too */
2951 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2955 if (RB_EMPTY_ROOT(&rec->holes))
2956 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2958 list_del(&orphan->list);
2961 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 struct inode_record *rec)
2971 struct rb_node *node;
2972 struct file_extent_hole *hole;
2976 node = rb_first(&rec->holes);
2980 hole = rb_entry(node, struct file_extent_hole, node);
2981 ret = btrfs_punch_hole(trans, root, rec->ino,
2982 hole->start, hole->len);
2985 ret = del_file_extent_hole(&rec->holes, hole->start,
2989 if (RB_EMPTY_ROOT(&rec->holes))
2990 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991 node = rb_first(&rec->holes);
2993 /* special case for a file losing all its file extent */
2995 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996 round_up(rec->isize, root->sectorsize));
3000 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001 rec->ino, root->objectid);
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3008 struct btrfs_trans_handle *trans;
3009 struct btrfs_path *path;
3012 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013 I_ERR_NO_ORPHAN_ITEM |
3014 I_ERR_LINK_COUNT_WRONG |
3015 I_ERR_NO_INODE_ITEM |
3016 I_ERR_FILE_EXTENT_ORPHAN |
3017 I_ERR_FILE_EXTENT_DISCOUNT|
3018 I_ERR_FILE_NBYTES_WRONG)))
3021 path = btrfs_alloc_path();
3026 * For nlink repair, it may create a dir and add link, so
3027 * 2 for parent(256)'s dir_index and dir_item
3028 * 2 for lost+found dir's inode_item and inode_ref
3029 * 1 for the new inode_ref of the file
3030 * 2 for lost+found dir's dir_index and dir_item for the file
3032 trans = btrfs_start_transaction(root, 7);
3033 if (IS_ERR(trans)) {
3034 btrfs_free_path(path);
3035 return PTR_ERR(trans);
3038 if (rec->errors & I_ERR_NO_INODE_ITEM)
3039 ret = repair_inode_no_item(trans, root, path, rec);
3040 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043 ret = repair_inode_discount_extent(trans, root, path, rec);
3044 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045 ret = repair_inode_isize(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047 ret = repair_inode_orphan_item(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049 ret = repair_inode_nlinks(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051 ret = repair_inode_nbytes(trans, root, path, rec);
3052 btrfs_commit_transaction(trans, root);
3053 btrfs_free_path(path);
3057 static int check_inode_recs(struct btrfs_root *root,
3058 struct cache_tree *inode_cache)
3060 struct cache_extent *cache;
3061 struct ptr_node *node;
3062 struct inode_record *rec;
3063 struct inode_backref *backref;
3068 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3070 if (btrfs_root_refs(&root->root_item) == 0) {
3071 if (!cache_tree_empty(inode_cache))
3072 fprintf(stderr, "warning line %d\n", __LINE__);
3077 * We need to record the highest inode number for later 'lost+found'
3079 * We must select an ino not used/referred by any existing inode, or
3080 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081 * this may cause 'lost+found' dir has wrong nlinks.
3083 cache = last_cache_extent(inode_cache);
3085 node = container_of(cache, struct ptr_node, cache);
3087 if (rec->ino > root->highest_inode)
3088 root->highest_inode = rec->ino;
3092 * We need to repair backrefs first because we could change some of the
3093 * errors in the inode recs.
3095 * We also need to go through and delete invalid backrefs first and then
3096 * add the correct ones second. We do this because we may get EEXIST
3097 * when adding back the correct index because we hadn't yet deleted the
3100 * For example, if we were missing a dir index then the directories
3101 * isize would be wrong, so if we fixed the isize to what we thought it
3102 * would be and then fixed the backref we'd still have a invalid fs, so
3103 * we need to add back the dir index and then check to see if the isize
3108 if (stage == 3 && !err)
3111 cache = search_cache_extent(inode_cache, 0);
3112 while (repair && cache) {
3113 node = container_of(cache, struct ptr_node, cache);
3115 cache = next_cache_extent(cache);
3117 /* Need to free everything up and rescan */
3119 remove_cache_extent(inode_cache, &node->cache);
3121 free_inode_rec(rec);
3125 if (list_empty(&rec->backrefs))
3128 ret = repair_inode_backrefs(root, rec, inode_cache,
3142 rec = get_inode_rec(inode_cache, root_dirid, 0);
3143 BUG_ON(IS_ERR(rec));
3145 ret = check_root_dir(rec);
3147 fprintf(stderr, "root %llu root dir %llu error\n",
3148 (unsigned long long)root->root_key.objectid,
3149 (unsigned long long)root_dirid);
3150 print_inode_error(root, rec);
3155 struct btrfs_trans_handle *trans;
3157 trans = btrfs_start_transaction(root, 1);
3158 if (IS_ERR(trans)) {
3159 err = PTR_ERR(trans);
3164 "root %llu missing its root dir, recreating\n",
3165 (unsigned long long)root->objectid);
3167 ret = btrfs_make_root_dir(trans, root, root_dirid);
3170 btrfs_commit_transaction(trans, root);
3174 fprintf(stderr, "root %llu root dir %llu not found\n",
3175 (unsigned long long)root->root_key.objectid,
3176 (unsigned long long)root_dirid);
3180 cache = search_cache_extent(inode_cache, 0);
3183 node = container_of(cache, struct ptr_node, cache);
3185 remove_cache_extent(inode_cache, &node->cache);
3187 if (rec->ino == root_dirid ||
3188 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189 free_inode_rec(rec);
3193 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194 ret = check_orphan_item(root, rec->ino);
3196 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197 if (can_free_inode_rec(rec)) {
3198 free_inode_rec(rec);
3203 if (!rec->found_inode_item)
3204 rec->errors |= I_ERR_NO_INODE_ITEM;
3205 if (rec->found_link != rec->nlink)
3206 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3208 ret = try_repair_inode(root, rec);
3209 if (ret == 0 && can_free_inode_rec(rec)) {
3210 free_inode_rec(rec);
3216 if (!(repair && ret == 0))
3218 print_inode_error(root, rec);
3219 list_for_each_entry(backref, &rec->backrefs, list) {
3220 if (!backref->found_dir_item)
3221 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222 if (!backref->found_dir_index)
3223 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224 if (!backref->found_inode_ref)
3225 backref->errors |= REF_ERR_NO_INODE_REF;
3226 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227 " namelen %u name %s filetype %d errors %x",
3228 (unsigned long long)backref->dir,
3229 (unsigned long long)backref->index,
3230 backref->namelen, backref->name,
3231 backref->filetype, backref->errors);
3232 print_ref_error(backref->errors);
3234 free_inode_rec(rec);
3236 return (error > 0) ? -1 : 0;
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3242 struct cache_extent *cache;
3243 struct root_record *rec = NULL;
3246 cache = lookup_cache_extent(root_cache, objectid, 1);
3248 rec = container_of(cache, struct root_record, cache);
3250 rec = calloc(1, sizeof(*rec));
3252 return ERR_PTR(-ENOMEM);
3253 rec->objectid = objectid;
3254 INIT_LIST_HEAD(&rec->backrefs);
3255 rec->cache.start = objectid;
3256 rec->cache.size = 1;
3258 ret = insert_cache_extent(root_cache, &rec->cache);
3260 return ERR_PTR(-EEXIST);
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266 u64 ref_root, u64 dir, u64 index,
3267 const char *name, int namelen)
3269 struct root_backref *backref;
3271 list_for_each_entry(backref, &rec->backrefs, list) {
3272 if (backref->ref_root != ref_root || backref->dir != dir ||
3273 backref->namelen != namelen)
3275 if (memcmp(name, backref->name, namelen))
3280 backref = calloc(1, sizeof(*backref) + namelen + 1);
3283 backref->ref_root = ref_root;
3285 backref->index = index;
3286 backref->namelen = namelen;
3287 memcpy(backref->name, name, namelen);
3288 backref->name[namelen] = '\0';
3289 list_add_tail(&backref->list, &rec->backrefs);
3293 static void free_root_record(struct cache_extent *cache)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = container_of(cache, struct root_record, cache);
3299 while (!list_empty(&rec->backrefs)) {
3300 backref = to_root_backref(rec->backrefs.next);
3301 list_del(&backref->list);
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3310 static int add_root_backref(struct cache_tree *root_cache,
3311 u64 root_id, u64 ref_root, u64 dir, u64 index,
3312 const char *name, int namelen,
3313 int item_type, int errors)
3315 struct root_record *rec;
3316 struct root_backref *backref;
3318 rec = get_root_rec(root_cache, root_id);
3319 BUG_ON(IS_ERR(rec));
3320 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3323 backref->errors |= errors;
3325 if (item_type != BTRFS_DIR_ITEM_KEY) {
3326 if (backref->found_dir_index || backref->found_back_ref ||
3327 backref->found_forward_ref) {
3328 if (backref->index != index)
3329 backref->errors |= REF_ERR_INDEX_UNMATCH;
3331 backref->index = index;
3335 if (item_type == BTRFS_DIR_ITEM_KEY) {
3336 if (backref->found_forward_ref)
3338 backref->found_dir_item = 1;
3339 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340 backref->found_dir_index = 1;
3341 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342 if (backref->found_forward_ref)
3343 backref->errors |= REF_ERR_DUP_ROOT_REF;
3344 else if (backref->found_dir_item)
3346 backref->found_forward_ref = 1;
3347 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348 if (backref->found_back_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350 backref->found_back_ref = 1;
3355 if (backref->found_forward_ref && backref->found_dir_item)
3356 backref->reachable = 1;
3360 static int merge_root_recs(struct btrfs_root *root,
3361 struct cache_tree *src_cache,
3362 struct cache_tree *dst_cache)
3364 struct cache_extent *cache;
3365 struct ptr_node *node;
3366 struct inode_record *rec;
3367 struct inode_backref *backref;
3370 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371 free_inode_recs_tree(src_cache);
3376 cache = search_cache_extent(src_cache, 0);
3379 node = container_of(cache, struct ptr_node, cache);
3381 remove_cache_extent(src_cache, &node->cache);
3384 ret = is_child_root(root, root->objectid, rec->ino);
3390 list_for_each_entry(backref, &rec->backrefs, list) {
3391 BUG_ON(backref->found_inode_ref);
3392 if (backref->found_dir_item)
3393 add_root_backref(dst_cache, rec->ino,
3394 root->root_key.objectid, backref->dir,
3395 backref->index, backref->name,
3396 backref->namelen, BTRFS_DIR_ITEM_KEY,
3398 if (backref->found_dir_index)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_INDEX_KEY,
3406 free_inode_rec(rec);
3413 static int check_root_refs(struct btrfs_root *root,
3414 struct cache_tree *root_cache)
3416 struct root_record *rec;
3417 struct root_record *ref_root;
3418 struct root_backref *backref;
3419 struct cache_extent *cache;
3425 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426 BUG_ON(IS_ERR(rec));
3429 /* fixme: this can not detect circular references */
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0)
3442 list_for_each_entry(backref, &rec->backrefs, list) {
3443 if (!backref->reachable)
3446 ref_root = get_root_rec(root_cache,
3448 BUG_ON(IS_ERR(ref_root));
3449 if (ref_root->found_ref > 0)
3452 backref->reachable = 0;
3454 if (rec->found_ref == 0)
3460 cache = search_cache_extent(root_cache, 0);
3464 rec = container_of(cache, struct root_record, cache);
3465 cache = next_cache_extent(cache);
3467 if (rec->found_ref == 0 &&
3468 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470 ret = check_orphan_item(root->fs_info->tree_root,
3476 * If we don't have a root item then we likely just have
3477 * a dir item in a snapshot for this root but no actual
3478 * ref key or anything so it's meaningless.
3480 if (!rec->found_root_item)
3483 fprintf(stderr, "fs tree %llu not referenced\n",
3484 (unsigned long long)rec->objectid);
3488 if (rec->found_ref > 0 && !rec->found_root_item)
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->found_dir_item)
3492 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493 if (!backref->found_dir_index)
3494 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495 if (!backref->found_back_ref)
3496 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497 if (!backref->found_forward_ref)
3498 backref->errors |= REF_ERR_NO_ROOT_REF;
3499 if (backref->reachable && backref->errors)
3506 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507 (unsigned long long)rec->objectid, rec->found_ref,
3508 rec->found_root_item ? "" : "not found");
3510 list_for_each_entry(backref, &rec->backrefs, list) {
3511 if (!backref->reachable)
3513 if (!backref->errors && rec->found_root_item)
3515 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516 " index %llu namelen %u name %s errors %x\n",
3517 (unsigned long long)backref->ref_root,
3518 (unsigned long long)backref->dir,
3519 (unsigned long long)backref->index,
3520 backref->namelen, backref->name,
3522 print_ref_error(backref->errors);
3525 return errors > 0 ? 1 : 0;
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529 struct btrfs_key *key,
3530 struct cache_tree *root_cache)
3536 struct btrfs_root_ref *ref;
3537 char namebuf[BTRFS_NAME_LEN];
3540 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3542 dirid = btrfs_root_ref_dirid(eb, ref);
3543 index = btrfs_root_ref_sequence(eb, ref);
3544 name_len = btrfs_root_ref_name_len(eb, ref);
3546 if (name_len <= BTRFS_NAME_LEN) {
3550 len = BTRFS_NAME_LEN;
3551 error = REF_ERR_NAME_TOO_LONG;
3553 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3555 if (key->type == BTRFS_ROOT_REF_KEY) {
3556 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557 index, namebuf, len, key->type, error);
3559 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560 index, namebuf, len, key->type, error);
3565 static void free_corrupt_block(struct cache_extent *cache)
3567 struct btrfs_corrupt_block *corrupt;
3569 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3576 * Repair the btree of the given root.
3578 * The fix is to remove the node key in corrupt_blocks cache_tree.
3579 * and rebalance the tree.
3580 * After the fix, the btree should be writeable.
3582 static int repair_btree(struct btrfs_root *root,
3583 struct cache_tree *corrupt_blocks)
3585 struct btrfs_trans_handle *trans;
3586 struct btrfs_path *path;
3587 struct btrfs_corrupt_block *corrupt;
3588 struct cache_extent *cache;
3589 struct btrfs_key key;
3594 if (cache_tree_empty(corrupt_blocks))
3597 path = btrfs_alloc_path();
3601 trans = btrfs_start_transaction(root, 1);
3602 if (IS_ERR(trans)) {
3603 ret = PTR_ERR(trans);
3604 fprintf(stderr, "Error starting transaction: %s\n",
3608 cache = first_cache_extent(corrupt_blocks);
3610 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612 level = corrupt->level;
3613 path->lowest_level = level;
3614 key.objectid = corrupt->key.objectid;
3615 key.type = corrupt->key.type;
3616 key.offset = corrupt->key.offset;
3619 * Here we don't want to do any tree balance, since it may
3620 * cause a balance with corrupted brother leaf/node,
3621 * so ins_len set to 0 here.
3622 * Balance will be done after all corrupt node/leaf is deleted.
3624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3627 offset = btrfs_node_blockptr(path->nodes[level],
3628 path->slots[level]);
3630 /* Remove the ptr */
3631 ret = btrfs_del_ptr(trans, root, path, level,
3632 path->slots[level]);
3636 * Remove the corresponding extent
3637 * return value is not concerned.
3639 btrfs_release_path(path);
3640 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641 0, root->root_key.objectid,
3643 cache = next_cache_extent(cache);
3646 /* Balance the btree using btrfs_search_slot() */
3647 cache = first_cache_extent(corrupt_blocks);
3649 corrupt = container_of(cache, struct btrfs_corrupt_block,
3651 memcpy(&key, &corrupt->key, sizeof(key));
3652 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3655 /* return will always >0 since it won't find the item */
3657 btrfs_release_path(path);
3658 cache = next_cache_extent(cache);
3661 btrfs_commit_transaction(trans, root);
3663 btrfs_free_path(path);
3667 static int check_fs_root(struct btrfs_root *root,
3668 struct cache_tree *root_cache,
3669 struct walk_control *wc)
3675 struct btrfs_path path;
3676 struct shared_node root_node;
3677 struct root_record *rec;
3678 struct btrfs_root_item *root_item = &root->root_item;
3679 struct cache_tree corrupt_blocks;
3680 struct orphan_data_extent *orphan;
3681 struct orphan_data_extent *tmp;
3682 enum btrfs_tree_block_status status;
3683 struct node_refs nrefs;
3686 * Reuse the corrupt_block cache tree to record corrupted tree block
3688 * Unlike the usage in extent tree check, here we do it in a per
3689 * fs/subvol tree base.
3691 cache_tree_init(&corrupt_blocks);
3692 root->fs_info->corrupt_blocks = &corrupt_blocks;
3694 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695 rec = get_root_rec(root_cache, root->root_key.objectid);
3696 BUG_ON(IS_ERR(rec));
3697 if (btrfs_root_refs(root_item) > 0)
3698 rec->found_root_item = 1;
3701 btrfs_init_path(&path);
3702 memset(&root_node, 0, sizeof(root_node));
3703 cache_tree_init(&root_node.root_cache);
3704 cache_tree_init(&root_node.inode_cache);
3705 memset(&nrefs, 0, sizeof(nrefs));
3707 /* Move the orphan extent record to corresponding inode_record */
3708 list_for_each_entry_safe(orphan, tmp,
3709 &root->orphan_data_extents, list) {
3710 struct inode_record *inode;
3712 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3714 BUG_ON(IS_ERR(inode));
3715 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716 list_move(&orphan->list, &inode->orphan_extents);
3719 level = btrfs_header_level(root->node);
3720 memset(wc->nodes, 0, sizeof(wc->nodes));
3721 wc->nodes[level] = &root_node;
3722 wc->active_node = level;
3723 wc->root_level = level;
3725 /* We may not have checked the root block, lets do that now */
3726 if (btrfs_is_leaf(root->node))
3727 status = btrfs_check_leaf(root, NULL, root->node);
3729 status = btrfs_check_node(root, NULL, root->node);
3730 if (status != BTRFS_TREE_BLOCK_CLEAN)
3733 if (btrfs_root_refs(root_item) > 0 ||
3734 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735 path.nodes[level] = root->node;
3736 extent_buffer_get(root->node);
3737 path.slots[level] = 0;
3739 struct btrfs_key key;
3740 struct btrfs_disk_key found_key;
3742 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743 level = root_item->drop_level;
3744 path.lowest_level = level;
3745 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3748 btrfs_node_key(path.nodes[level], &found_key,
3750 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3751 sizeof(found_key)));
3755 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3761 wret = walk_up_tree(root, &path, wc, &level);
3768 btrfs_release_path(&path);
3770 if (!cache_tree_empty(&corrupt_blocks)) {
3771 struct cache_extent *cache;
3772 struct btrfs_corrupt_block *corrupt;
3774 printf("The following tree block(s) is corrupted in tree %llu:\n",
3775 root->root_key.objectid);
3776 cache = first_cache_extent(&corrupt_blocks);
3778 corrupt = container_of(cache,
3779 struct btrfs_corrupt_block,
3781 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3782 cache->start, corrupt->level,
3783 corrupt->key.objectid, corrupt->key.type,
3784 corrupt->key.offset);
3785 cache = next_cache_extent(cache);
3788 printf("Try to repair the btree for root %llu\n",
3789 root->root_key.objectid);
3790 ret = repair_btree(root, &corrupt_blocks);
3792 fprintf(stderr, "Failed to repair btree: %s\n",
3795 printf("Btree for root %llu is fixed\n",
3796 root->root_key.objectid);
3800 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3804 if (root_node.current) {
3805 root_node.current->checked = 1;
3806 maybe_free_inode_rec(&root_node.inode_cache,
3810 err = check_inode_recs(root, &root_node.inode_cache);
3814 free_corrupt_blocks_tree(&corrupt_blocks);
3815 root->fs_info->corrupt_blocks = NULL;
3816 free_orphan_data_extents(&root->orphan_data_extents);
3820 static int fs_root_objectid(u64 objectid)
3822 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3823 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3825 return is_fstree(objectid);
3828 static int check_fs_roots(struct btrfs_root *root,
3829 struct cache_tree *root_cache)
3831 struct btrfs_path path;
3832 struct btrfs_key key;
3833 struct walk_control wc;
3834 struct extent_buffer *leaf, *tree_node;
3835 struct btrfs_root *tmp_root;
3836 struct btrfs_root *tree_root = root->fs_info->tree_root;
3840 if (ctx.progress_enabled) {
3841 ctx.tp = TASK_FS_ROOTS;
3842 task_start(ctx.info);
3846 * Just in case we made any changes to the extent tree that weren't
3847 * reflected into the free space cache yet.
3850 reset_cached_block_groups(root->fs_info);
3851 memset(&wc, 0, sizeof(wc));
3852 cache_tree_init(&wc.shared);
3853 btrfs_init_path(&path);
3858 key.type = BTRFS_ROOT_ITEM_KEY;
3859 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3864 tree_node = tree_root->node;
3866 if (tree_node != tree_root->node) {
3867 free_root_recs_tree(root_cache);
3868 btrfs_release_path(&path);
3871 leaf = path.nodes[0];
3872 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3873 ret = btrfs_next_leaf(tree_root, &path);
3879 leaf = path.nodes[0];
3881 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3882 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3883 fs_root_objectid(key.objectid)) {
3884 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3885 tmp_root = btrfs_read_fs_root_no_cache(
3886 root->fs_info, &key);
3888 key.offset = (u64)-1;
3889 tmp_root = btrfs_read_fs_root(
3890 root->fs_info, &key);
3892 if (IS_ERR(tmp_root)) {
3896 ret = check_fs_root(tmp_root, root_cache, &wc);
3897 if (ret == -EAGAIN) {
3898 free_root_recs_tree(root_cache);
3899 btrfs_release_path(&path);
3904 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3905 btrfs_free_fs_root(tmp_root);
3906 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3907 key.type == BTRFS_ROOT_BACKREF_KEY) {
3908 process_root_ref(leaf, path.slots[0], &key,
3915 btrfs_release_path(&path);
3917 free_extent_cache_tree(&wc.shared);
3918 if (!cache_tree_empty(&wc.shared))
3919 fprintf(stderr, "warning line %d\n", __LINE__);
3921 task_stop(ctx.info);
3926 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3929 struct extent_backref *back;
3930 struct tree_backref *tback;
3931 struct data_backref *dback;
3935 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3936 back = rb_node_to_extent_backref(n);
3937 if (!back->found_extent_tree) {
3941 if (back->is_data) {
3942 dback = to_data_backref(back);
3943 fprintf(stderr, "Backref %llu %s %llu"
3944 " owner %llu offset %llu num_refs %lu"
3945 " not found in extent tree\n",
3946 (unsigned long long)rec->start,
3947 back->full_backref ?
3949 back->full_backref ?
3950 (unsigned long long)dback->parent:
3951 (unsigned long long)dback->root,
3952 (unsigned long long)dback->owner,
3953 (unsigned long long)dback->offset,
3954 (unsigned long)dback->num_refs);
3956 tback = to_tree_backref(back);
3957 fprintf(stderr, "Backref %llu parent %llu"
3958 " root %llu not found in extent tree\n",
3959 (unsigned long long)rec->start,
3960 (unsigned long long)tback->parent,
3961 (unsigned long long)tback->root);
3964 if (!back->is_data && !back->found_ref) {
3968 tback = to_tree_backref(back);
3969 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3970 (unsigned long long)rec->start,
3971 back->full_backref ? "parent" : "root",
3972 back->full_backref ?
3973 (unsigned long long)tback->parent :
3974 (unsigned long long)tback->root, back);
3976 if (back->is_data) {
3977 dback = to_data_backref(back);
3978 if (dback->found_ref != dback->num_refs) {
3982 fprintf(stderr, "Incorrect local backref count"
3983 " on %llu %s %llu owner %llu"
3984 " offset %llu found %u wanted %u back %p\n",
3985 (unsigned long long)rec->start,
3986 back->full_backref ?
3988 back->full_backref ?
3989 (unsigned long long)dback->parent:
3990 (unsigned long long)dback->root,
3991 (unsigned long long)dback->owner,
3992 (unsigned long long)dback->offset,
3993 dback->found_ref, dback->num_refs, back);
3995 if (dback->disk_bytenr != rec->start) {
3999 fprintf(stderr, "Backref disk bytenr does not"
4000 " match extent record, bytenr=%llu, "
4001 "ref bytenr=%llu\n",
4002 (unsigned long long)rec->start,
4003 (unsigned long long)dback->disk_bytenr);
4006 if (dback->bytes != rec->nr) {
4010 fprintf(stderr, "Backref bytes do not match "
4011 "extent backref, bytenr=%llu, ref "
4012 "bytes=%llu, backref bytes=%llu\n",
4013 (unsigned long long)rec->start,
4014 (unsigned long long)rec->nr,
4015 (unsigned long long)dback->bytes);
4018 if (!back->is_data) {
4021 dback = to_data_backref(back);
4022 found += dback->found_ref;
4025 if (found != rec->refs) {
4029 fprintf(stderr, "Incorrect global backref count "
4030 "on %llu found %llu wanted %llu\n",
4031 (unsigned long long)rec->start,
4032 (unsigned long long)found,
4033 (unsigned long long)rec->refs);
4039 static void __free_one_backref(struct rb_node *node)
4041 struct extent_backref *back = rb_node_to_extent_backref(node);
4046 static void free_all_extent_backrefs(struct extent_record *rec)
4048 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4051 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4052 struct cache_tree *extent_cache)
4054 struct cache_extent *cache;
4055 struct extent_record *rec;
4058 cache = first_cache_extent(extent_cache);
4061 rec = container_of(cache, struct extent_record, cache);
4062 remove_cache_extent(extent_cache, cache);
4063 free_all_extent_backrefs(rec);
4068 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4069 struct extent_record *rec)
4071 if (rec->content_checked && rec->owner_ref_checked &&
4072 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4073 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4074 !rec->bad_full_backref && !rec->crossing_stripes &&
4075 !rec->wrong_chunk_type) {
4076 remove_cache_extent(extent_cache, &rec->cache);
4077 free_all_extent_backrefs(rec);
4078 list_del_init(&rec->list);
4084 static int check_owner_ref(struct btrfs_root *root,
4085 struct extent_record *rec,
4086 struct extent_buffer *buf)
4088 struct extent_backref *node, *tmp;
4089 struct tree_backref *back;
4090 struct btrfs_root *ref_root;
4091 struct btrfs_key key;
4092 struct btrfs_path path;
4093 struct extent_buffer *parent;
4098 rbtree_postorder_for_each_entry_safe(node, tmp,
4099 &rec->backref_tree, node) {
4102 if (!node->found_ref)
4104 if (node->full_backref)
4106 back = to_tree_backref(node);
4107 if (btrfs_header_owner(buf) == back->root)
4110 BUG_ON(rec->is_root);
4112 /* try to find the block by search corresponding fs tree */
4113 key.objectid = btrfs_header_owner(buf);
4114 key.type = BTRFS_ROOT_ITEM_KEY;
4115 key.offset = (u64)-1;
4117 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4118 if (IS_ERR(ref_root))
4121 level = btrfs_header_level(buf);
4123 btrfs_item_key_to_cpu(buf, &key, 0);
4125 btrfs_node_key_to_cpu(buf, &key, 0);
4127 btrfs_init_path(&path);
4128 path.lowest_level = level + 1;
4129 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4133 parent = path.nodes[level + 1];
4134 if (parent && buf->start == btrfs_node_blockptr(parent,
4135 path.slots[level + 1]))
4138 btrfs_release_path(&path);
4139 return found ? 0 : 1;
4142 static int is_extent_tree_record(struct extent_record *rec)
4144 struct extent_backref *ref, *tmp;
4145 struct tree_backref *back;
4148 rbtree_postorder_for_each_entry_safe(ref, tmp,
4149 &rec->backref_tree, node) {
4152 back = to_tree_backref(ref);
4153 if (ref->full_backref)
4155 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4162 static int record_bad_block_io(struct btrfs_fs_info *info,
4163 struct cache_tree *extent_cache,
4166 struct extent_record *rec;
4167 struct cache_extent *cache;
4168 struct btrfs_key key;
4170 cache = lookup_cache_extent(extent_cache, start, len);
4174 rec = container_of(cache, struct extent_record, cache);
4175 if (!is_extent_tree_record(rec))
4178 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4179 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4182 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4183 struct extent_buffer *buf, int slot)
4185 if (btrfs_header_level(buf)) {
4186 struct btrfs_key_ptr ptr1, ptr2;
4188 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4189 sizeof(struct btrfs_key_ptr));
4190 read_extent_buffer(buf, &ptr2,
4191 btrfs_node_key_ptr_offset(slot + 1),
4192 sizeof(struct btrfs_key_ptr));
4193 write_extent_buffer(buf, &ptr1,
4194 btrfs_node_key_ptr_offset(slot + 1),
4195 sizeof(struct btrfs_key_ptr));
4196 write_extent_buffer(buf, &ptr2,
4197 btrfs_node_key_ptr_offset(slot),
4198 sizeof(struct btrfs_key_ptr));
4200 struct btrfs_disk_key key;
4201 btrfs_node_key(buf, &key, 0);
4202 btrfs_fixup_low_keys(root, path, &key,
4203 btrfs_header_level(buf) + 1);
4206 struct btrfs_item *item1, *item2;
4207 struct btrfs_key k1, k2;
4208 char *item1_data, *item2_data;
4209 u32 item1_offset, item2_offset, item1_size, item2_size;
4211 item1 = btrfs_item_nr(slot);
4212 item2 = btrfs_item_nr(slot + 1);
4213 btrfs_item_key_to_cpu(buf, &k1, slot);
4214 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4215 item1_offset = btrfs_item_offset(buf, item1);
4216 item2_offset = btrfs_item_offset(buf, item2);
4217 item1_size = btrfs_item_size(buf, item1);
4218 item2_size = btrfs_item_size(buf, item2);
4220 item1_data = malloc(item1_size);
4223 item2_data = malloc(item2_size);
4229 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4230 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4232 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4233 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4237 btrfs_set_item_offset(buf, item1, item2_offset);
4238 btrfs_set_item_offset(buf, item2, item1_offset);
4239 btrfs_set_item_size(buf, item1, item2_size);
4240 btrfs_set_item_size(buf, item2, item1_size);
4242 path->slots[0] = slot;
4243 btrfs_set_item_key_unsafe(root, path, &k2);
4244 path->slots[0] = slot + 1;
4245 btrfs_set_item_key_unsafe(root, path, &k1);
4250 static int fix_key_order(struct btrfs_trans_handle *trans,
4251 struct btrfs_root *root,
4252 struct btrfs_path *path)
4254 struct extent_buffer *buf;
4255 struct btrfs_key k1, k2;
4257 int level = path->lowest_level;
4260 buf = path->nodes[level];
4261 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4263 btrfs_node_key_to_cpu(buf, &k1, i);
4264 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4266 btrfs_item_key_to_cpu(buf, &k1, i);
4267 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4269 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4271 ret = swap_values(root, path, buf, i);
4274 btrfs_mark_buffer_dirty(buf);
4280 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4281 struct btrfs_root *root,
4282 struct btrfs_path *path,
4283 struct extent_buffer *buf, int slot)
4285 struct btrfs_key key;
4286 int nritems = btrfs_header_nritems(buf);
4288 btrfs_item_key_to_cpu(buf, &key, slot);
4290 /* These are all the keys we can deal with missing. */
4291 if (key.type != BTRFS_DIR_INDEX_KEY &&
4292 key.type != BTRFS_EXTENT_ITEM_KEY &&
4293 key.type != BTRFS_METADATA_ITEM_KEY &&
4294 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4295 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4298 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4299 (unsigned long long)key.objectid, key.type,
4300 (unsigned long long)key.offset, slot, buf->start);
4301 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4302 btrfs_item_nr_offset(slot + 1),
4303 sizeof(struct btrfs_item) *
4304 (nritems - slot - 1));
4305 btrfs_set_header_nritems(buf, nritems - 1);
4307 struct btrfs_disk_key disk_key;
4309 btrfs_item_key(buf, &disk_key, 0);
4310 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4312 btrfs_mark_buffer_dirty(buf);
4316 static int fix_item_offset(struct btrfs_trans_handle *trans,
4317 struct btrfs_root *root,
4318 struct btrfs_path *path)
4320 struct extent_buffer *buf;
4324 /* We should only get this for leaves */
4325 BUG_ON(path->lowest_level);
4326 buf = path->nodes[0];
4328 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4329 unsigned int shift = 0, offset;
4331 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4332 BTRFS_LEAF_DATA_SIZE(root)) {
4333 if (btrfs_item_end_nr(buf, i) >
4334 BTRFS_LEAF_DATA_SIZE(root)) {
4335 ret = delete_bogus_item(trans, root, path,
4339 fprintf(stderr, "item is off the end of the "
4340 "leaf, can't fix\n");
4344 shift = BTRFS_LEAF_DATA_SIZE(root) -
4345 btrfs_item_end_nr(buf, i);
4346 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4347 btrfs_item_offset_nr(buf, i - 1)) {
4348 if (btrfs_item_end_nr(buf, i) >
4349 btrfs_item_offset_nr(buf, i - 1)) {
4350 ret = delete_bogus_item(trans, root, path,
4354 fprintf(stderr, "items overlap, can't fix\n");
4358 shift = btrfs_item_offset_nr(buf, i - 1) -
4359 btrfs_item_end_nr(buf, i);
4364 printf("Shifting item nr %d by %u bytes in block %llu\n",
4365 i, shift, (unsigned long long)buf->start);
4366 offset = btrfs_item_offset_nr(buf, i);
4367 memmove_extent_buffer(buf,
4368 btrfs_leaf_data(buf) + offset + shift,
4369 btrfs_leaf_data(buf) + offset,
4370 btrfs_item_size_nr(buf, i));
4371 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4373 btrfs_mark_buffer_dirty(buf);
4377 * We may have moved things, in which case we want to exit so we don't
4378 * write those changes out. Once we have proper abort functionality in
4379 * progs this can be changed to something nicer.
4386 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4387 * then just return -EIO.
4389 static int try_to_fix_bad_block(struct btrfs_root *root,
4390 struct extent_buffer *buf,
4391 enum btrfs_tree_block_status status)
4393 struct btrfs_trans_handle *trans;
4394 struct ulist *roots;
4395 struct ulist_node *node;
4396 struct btrfs_root *search_root;
4397 struct btrfs_path *path;
4398 struct ulist_iterator iter;
4399 struct btrfs_key root_key, key;
4402 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4403 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4406 path = btrfs_alloc_path();
4410 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4413 btrfs_free_path(path);
4417 ULIST_ITER_INIT(&iter);
4418 while ((node = ulist_next(roots, &iter))) {
4419 root_key.objectid = node->val;
4420 root_key.type = BTRFS_ROOT_ITEM_KEY;
4421 root_key.offset = (u64)-1;
4423 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4430 trans = btrfs_start_transaction(search_root, 0);
4431 if (IS_ERR(trans)) {
4432 ret = PTR_ERR(trans);
4436 path->lowest_level = btrfs_header_level(buf);
4437 path->skip_check_block = 1;
4438 if (path->lowest_level)
4439 btrfs_node_key_to_cpu(buf, &key, 0);
4441 btrfs_item_key_to_cpu(buf, &key, 0);
4442 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4445 btrfs_commit_transaction(trans, search_root);
4448 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4449 ret = fix_key_order(trans, search_root, path);
4450 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4451 ret = fix_item_offset(trans, search_root, path);
4453 btrfs_commit_transaction(trans, search_root);
4456 btrfs_release_path(path);
4457 btrfs_commit_transaction(trans, search_root);
4460 btrfs_free_path(path);
4464 static int check_block(struct btrfs_root *root,
4465 struct cache_tree *extent_cache,
4466 struct extent_buffer *buf, u64 flags)
4468 struct extent_record *rec;
4469 struct cache_extent *cache;
4470 struct btrfs_key key;
4471 enum btrfs_tree_block_status status;
4475 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4478 rec = container_of(cache, struct extent_record, cache);
4479 rec->generation = btrfs_header_generation(buf);
4481 level = btrfs_header_level(buf);
4482 if (btrfs_header_nritems(buf) > 0) {
4485 btrfs_item_key_to_cpu(buf, &key, 0);
4487 btrfs_node_key_to_cpu(buf, &key, 0);
4489 rec->info_objectid = key.objectid;
4491 rec->info_level = level;
4493 if (btrfs_is_leaf(buf))
4494 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4496 status = btrfs_check_node(root, &rec->parent_key, buf);
4498 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4500 status = try_to_fix_bad_block(root, buf, status);
4501 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4503 fprintf(stderr, "bad block %llu\n",
4504 (unsigned long long)buf->start);
4507 * Signal to callers we need to start the scan over
4508 * again since we'll have cowed blocks.
4513 rec->content_checked = 1;
4514 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4515 rec->owner_ref_checked = 1;
4517 ret = check_owner_ref(root, rec, buf);
4519 rec->owner_ref_checked = 1;
4523 maybe_free_extent_rec(extent_cache, rec);
4528 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4529 u64 parent, u64 root)
4531 struct rb_node *node;
4532 struct tree_backref *back = NULL;
4533 struct tree_backref match = {
4540 match.parent = parent;
4541 match.node.full_backref = 1;
4546 node = rb_search(&rec->backref_tree, &match.node.node,
4547 (rb_compare_keys)compare_extent_backref, NULL);
4549 back = to_tree_backref(rb_node_to_extent_backref(node));
4554 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4555 u64 parent, u64 root)
4557 struct tree_backref *ref = malloc(sizeof(*ref));
4561 memset(&ref->node, 0, sizeof(ref->node));
4563 ref->parent = parent;
4564 ref->node.full_backref = 1;
4567 ref->node.full_backref = 0;
4569 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4574 static struct data_backref *find_data_backref(struct extent_record *rec,
4575 u64 parent, u64 root,
4576 u64 owner, u64 offset,
4578 u64 disk_bytenr, u64 bytes)
4580 struct rb_node *node;
4581 struct data_backref *back = NULL;
4582 struct data_backref match = {
4589 .found_ref = found_ref,
4590 .disk_bytenr = disk_bytenr,
4594 match.parent = parent;
4595 match.node.full_backref = 1;
4600 node = rb_search(&rec->backref_tree, &match.node.node,
4601 (rb_compare_keys)compare_extent_backref, NULL);
4603 back = to_data_backref(rb_node_to_extent_backref(node));
4608 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4609 u64 parent, u64 root,
4610 u64 owner, u64 offset,
4613 struct data_backref *ref = malloc(sizeof(*ref));
4617 memset(&ref->node, 0, sizeof(ref->node));
4618 ref->node.is_data = 1;
4621 ref->parent = parent;
4624 ref->node.full_backref = 1;
4628 ref->offset = offset;
4629 ref->node.full_backref = 0;
4631 ref->bytes = max_size;
4634 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4635 if (max_size > rec->max_size)
4636 rec->max_size = max_size;
4640 /* Check if the type of extent matches with its chunk */
4641 static void check_extent_type(struct extent_record *rec)
4643 struct btrfs_block_group_cache *bg_cache;
4645 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4649 /* data extent, check chunk directly*/
4650 if (!rec->metadata) {
4651 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4652 rec->wrong_chunk_type = 1;
4656 /* metadata extent, check the obvious case first */
4657 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4658 BTRFS_BLOCK_GROUP_METADATA))) {
4659 rec->wrong_chunk_type = 1;
4664 * Check SYSTEM extent, as it's also marked as metadata, we can only
4665 * make sure it's a SYSTEM extent by its backref
4667 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4668 struct extent_backref *node;
4669 struct tree_backref *tback;
4672 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4673 if (node->is_data) {
4674 /* tree block shouldn't have data backref */
4675 rec->wrong_chunk_type = 1;
4678 tback = container_of(node, struct tree_backref, node);
4680 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4681 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4683 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4684 if (!(bg_cache->flags & bg_type))
4685 rec->wrong_chunk_type = 1;
4690 * Allocate a new extent record, fill default values from @tmpl and insert int
4691 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4692 * the cache, otherwise it fails.
4694 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4695 struct extent_record *tmpl)
4697 struct extent_record *rec;
4700 rec = malloc(sizeof(*rec));
4703 rec->start = tmpl->start;
4704 rec->max_size = tmpl->max_size;
4705 rec->nr = max(tmpl->nr, tmpl->max_size);
4706 rec->found_rec = tmpl->found_rec;
4707 rec->content_checked = tmpl->content_checked;
4708 rec->owner_ref_checked = tmpl->owner_ref_checked;
4709 rec->num_duplicates = 0;
4710 rec->metadata = tmpl->metadata;
4711 rec->flag_block_full_backref = FLAG_UNSET;
4712 rec->bad_full_backref = 0;
4713 rec->crossing_stripes = 0;
4714 rec->wrong_chunk_type = 0;
4715 rec->is_root = tmpl->is_root;
4716 rec->refs = tmpl->refs;
4717 rec->extent_item_refs = tmpl->extent_item_refs;
4718 rec->parent_generation = tmpl->parent_generation;
4719 INIT_LIST_HEAD(&rec->backrefs);
4720 INIT_LIST_HEAD(&rec->dups);
4721 INIT_LIST_HEAD(&rec->list);
4722 rec->backref_tree = RB_ROOT;
4723 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4724 rec->cache.start = tmpl->start;
4725 rec->cache.size = tmpl->nr;
4726 ret = insert_cache_extent(extent_cache, &rec->cache);
4728 bytes_used += rec->nr;
4731 rec->crossing_stripes = check_crossing_stripes(rec->start,
4732 global_info->tree_root->nodesize);
4733 check_extent_type(rec);
4738 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4740 * - refs - if found, increase refs
4741 * - is_root - if found, set
4742 * - content_checked - if found, set
4743 * - owner_ref_checked - if found, set
4745 * If not found, create a new one, initialize and insert.
4747 static int add_extent_rec(struct cache_tree *extent_cache,
4748 struct extent_record *tmpl)
4750 struct extent_record *rec;
4751 struct cache_extent *cache;
4755 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4757 rec = container_of(cache, struct extent_record, cache);
4761 rec->nr = max(tmpl->nr, tmpl->max_size);
4764 * We need to make sure to reset nr to whatever the extent
4765 * record says was the real size, this way we can compare it to
4768 if (tmpl->found_rec) {
4769 if (tmpl->start != rec->start || rec->found_rec) {
4770 struct extent_record *tmp;
4773 if (list_empty(&rec->list))
4774 list_add_tail(&rec->list,
4775 &duplicate_extents);
4778 * We have to do this song and dance in case we
4779 * find an extent record that falls inside of
4780 * our current extent record but does not have
4781 * the same objectid.
4783 tmp = malloc(sizeof(*tmp));
4786 tmp->start = tmpl->start;
4787 tmp->max_size = tmpl->max_size;
4790 tmp->metadata = tmpl->metadata;
4791 tmp->extent_item_refs = tmpl->extent_item_refs;
4792 INIT_LIST_HEAD(&tmp->list);
4793 list_add_tail(&tmp->list, &rec->dups);
4794 rec->num_duplicates++;
4801 if (tmpl->extent_item_refs && !dup) {
4802 if (rec->extent_item_refs) {
4803 fprintf(stderr, "block %llu rec "
4804 "extent_item_refs %llu, passed %llu\n",
4805 (unsigned long long)tmpl->start,
4806 (unsigned long long)
4807 rec->extent_item_refs,
4808 (unsigned long long)tmpl->extent_item_refs);
4810 rec->extent_item_refs = tmpl->extent_item_refs;
4814 if (tmpl->content_checked)
4815 rec->content_checked = 1;
4816 if (tmpl->owner_ref_checked)
4817 rec->owner_ref_checked = 1;
4818 memcpy(&rec->parent_key, &tmpl->parent_key,
4819 sizeof(tmpl->parent_key));
4820 if (tmpl->parent_generation)
4821 rec->parent_generation = tmpl->parent_generation;
4822 if (rec->max_size < tmpl->max_size)
4823 rec->max_size = tmpl->max_size;
4826 * A metadata extent can't cross stripe_len boundary, otherwise
4827 * kernel scrub won't be able to handle it.
4828 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4832 rec->crossing_stripes = check_crossing_stripes(
4833 rec->start, global_info->tree_root->nodesize);
4834 check_extent_type(rec);
4835 maybe_free_extent_rec(extent_cache, rec);
4839 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4844 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4845 u64 parent, u64 root, int found_ref)
4847 struct extent_record *rec;
4848 struct tree_backref *back;
4849 struct cache_extent *cache;
4851 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4853 struct extent_record tmpl;
4855 memset(&tmpl, 0, sizeof(tmpl));
4856 tmpl.start = bytenr;
4860 add_extent_rec_nolookup(extent_cache, &tmpl);
4862 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4867 rec = container_of(cache, struct extent_record, cache);
4868 if (rec->start != bytenr) {
4872 back = find_tree_backref(rec, parent, root);
4874 back = alloc_tree_backref(rec, parent, root);
4879 if (back->node.found_ref) {
4880 fprintf(stderr, "Extent back ref already exists "
4881 "for %llu parent %llu root %llu \n",
4882 (unsigned long long)bytenr,
4883 (unsigned long long)parent,
4884 (unsigned long long)root);
4886 back->node.found_ref = 1;
4888 if (back->node.found_extent_tree) {
4889 fprintf(stderr, "Extent back ref already exists "
4890 "for %llu parent %llu root %llu \n",
4891 (unsigned long long)bytenr,
4892 (unsigned long long)parent,
4893 (unsigned long long)root);
4895 back->node.found_extent_tree = 1;
4897 check_extent_type(rec);
4898 maybe_free_extent_rec(extent_cache, rec);
4902 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4903 u64 parent, u64 root, u64 owner, u64 offset,
4904 u32 num_refs, int found_ref, u64 max_size)
4906 struct extent_record *rec;
4907 struct data_backref *back;
4908 struct cache_extent *cache;
4910 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4912 struct extent_record tmpl;
4914 memset(&tmpl, 0, sizeof(tmpl));
4915 tmpl.start = bytenr;
4917 tmpl.max_size = max_size;
4919 add_extent_rec_nolookup(extent_cache, &tmpl);
4921 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4926 rec = container_of(cache, struct extent_record, cache);
4927 if (rec->max_size < max_size)
4928 rec->max_size = max_size;
4931 * If found_ref is set then max_size is the real size and must match the
4932 * existing refs. So if we have already found a ref then we need to
4933 * make sure that this ref matches the existing one, otherwise we need
4934 * to add a new backref so we can notice that the backrefs don't match
4935 * and we need to figure out who is telling the truth. This is to
4936 * account for that awful fsync bug I introduced where we'd end up with
4937 * a btrfs_file_extent_item that would have its length include multiple
4938 * prealloc extents or point inside of a prealloc extent.
4940 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4943 back = alloc_data_backref(rec, parent, root, owner, offset,
4949 BUG_ON(num_refs != 1);
4950 if (back->node.found_ref)
4951 BUG_ON(back->bytes != max_size);
4952 back->node.found_ref = 1;
4953 back->found_ref += 1;
4954 back->bytes = max_size;
4955 back->disk_bytenr = bytenr;
4957 rec->content_checked = 1;
4958 rec->owner_ref_checked = 1;
4960 if (back->node.found_extent_tree) {
4961 fprintf(stderr, "Extent back ref already exists "
4962 "for %llu parent %llu root %llu "
4963 "owner %llu offset %llu num_refs %lu\n",
4964 (unsigned long long)bytenr,
4965 (unsigned long long)parent,
4966 (unsigned long long)root,
4967 (unsigned long long)owner,
4968 (unsigned long long)offset,
4969 (unsigned long)num_refs);
4971 back->num_refs = num_refs;
4972 back->node.found_extent_tree = 1;
4974 maybe_free_extent_rec(extent_cache, rec);
4978 static int add_pending(struct cache_tree *pending,
4979 struct cache_tree *seen, u64 bytenr, u32 size)
4982 ret = add_cache_extent(seen, bytenr, size);
4985 add_cache_extent(pending, bytenr, size);
4989 static int pick_next_pending(struct cache_tree *pending,
4990 struct cache_tree *reada,
4991 struct cache_tree *nodes,
4992 u64 last, struct block_info *bits, int bits_nr,
4995 unsigned long node_start = last;
4996 struct cache_extent *cache;
4999 cache = search_cache_extent(reada, 0);
5001 bits[0].start = cache->start;
5002 bits[0].size = cache->size;
5007 if (node_start > 32768)
5008 node_start -= 32768;
5010 cache = search_cache_extent(nodes, node_start);
5012 cache = search_cache_extent(nodes, 0);
5015 cache = search_cache_extent(pending, 0);
5020 bits[ret].start = cache->start;
5021 bits[ret].size = cache->size;
5022 cache = next_cache_extent(cache);
5024 } while (cache && ret < bits_nr);
5030 bits[ret].start = cache->start;
5031 bits[ret].size = cache->size;
5032 cache = next_cache_extent(cache);
5034 } while (cache && ret < bits_nr);
5036 if (bits_nr - ret > 8) {
5037 u64 lookup = bits[0].start + bits[0].size;
5038 struct cache_extent *next;
5039 next = search_cache_extent(pending, lookup);
5041 if (next->start - lookup > 32768)
5043 bits[ret].start = next->start;
5044 bits[ret].size = next->size;
5045 lookup = next->start + next->size;
5049 next = next_cache_extent(next);
5057 static void free_chunk_record(struct cache_extent *cache)
5059 struct chunk_record *rec;
5061 rec = container_of(cache, struct chunk_record, cache);
5062 list_del_init(&rec->list);
5063 list_del_init(&rec->dextents);
5067 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5069 cache_tree_free_extents(chunk_cache, free_chunk_record);
5072 static void free_device_record(struct rb_node *node)
5074 struct device_record *rec;
5076 rec = container_of(node, struct device_record, node);
5080 FREE_RB_BASED_TREE(device_cache, free_device_record);
5082 int insert_block_group_record(struct block_group_tree *tree,
5083 struct block_group_record *bg_rec)
5087 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5091 list_add_tail(&bg_rec->list, &tree->block_groups);
5095 static void free_block_group_record(struct cache_extent *cache)
5097 struct block_group_record *rec;
5099 rec = container_of(cache, struct block_group_record, cache);
5100 list_del_init(&rec->list);
5104 void free_block_group_tree(struct block_group_tree *tree)
5106 cache_tree_free_extents(&tree->tree, free_block_group_record);
5109 int insert_device_extent_record(struct device_extent_tree *tree,
5110 struct device_extent_record *de_rec)
5115 * Device extent is a bit different from the other extents, because
5116 * the extents which belong to the different devices may have the
5117 * same start and size, so we need use the special extent cache
5118 * search/insert functions.
5120 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5124 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5125 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5129 static void free_device_extent_record(struct cache_extent *cache)
5131 struct device_extent_record *rec;
5133 rec = container_of(cache, struct device_extent_record, cache);
5134 if (!list_empty(&rec->chunk_list))
5135 list_del_init(&rec->chunk_list);
5136 if (!list_empty(&rec->device_list))
5137 list_del_init(&rec->device_list);
5141 void free_device_extent_tree(struct device_extent_tree *tree)
5143 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5146 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5147 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5148 struct extent_buffer *leaf, int slot)
5150 struct btrfs_extent_ref_v0 *ref0;
5151 struct btrfs_key key;
5153 btrfs_item_key_to_cpu(leaf, &key, slot);
5154 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5155 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5156 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5158 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5159 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5165 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5166 struct btrfs_key *key,
5169 struct btrfs_chunk *ptr;
5170 struct chunk_record *rec;
5173 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5174 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5176 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5178 fprintf(stderr, "memory allocation failed\n");
5182 INIT_LIST_HEAD(&rec->list);
5183 INIT_LIST_HEAD(&rec->dextents);
5186 rec->cache.start = key->offset;
5187 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5189 rec->generation = btrfs_header_generation(leaf);
5191 rec->objectid = key->objectid;
5192 rec->type = key->type;
5193 rec->offset = key->offset;
5195 rec->length = rec->cache.size;
5196 rec->owner = btrfs_chunk_owner(leaf, ptr);
5197 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5198 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5199 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5200 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5201 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5202 rec->num_stripes = num_stripes;
5203 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5205 for (i = 0; i < rec->num_stripes; ++i) {
5206 rec->stripes[i].devid =
5207 btrfs_stripe_devid_nr(leaf, ptr, i);
5208 rec->stripes[i].offset =
5209 btrfs_stripe_offset_nr(leaf, ptr, i);
5210 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5211 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5218 static int process_chunk_item(struct cache_tree *chunk_cache,
5219 struct btrfs_key *key, struct extent_buffer *eb,
5222 struct chunk_record *rec;
5225 rec = btrfs_new_chunk_record(eb, key, slot);
5226 ret = insert_cache_extent(chunk_cache, &rec->cache);
5228 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5229 rec->offset, rec->length);
5236 static int process_device_item(struct rb_root *dev_cache,
5237 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5239 struct btrfs_dev_item *ptr;
5240 struct device_record *rec;
5243 ptr = btrfs_item_ptr(eb,
5244 slot, struct btrfs_dev_item);
5246 rec = malloc(sizeof(*rec));
5248 fprintf(stderr, "memory allocation failed\n");
5252 rec->devid = key->offset;
5253 rec->generation = btrfs_header_generation(eb);
5255 rec->objectid = key->objectid;
5256 rec->type = key->type;
5257 rec->offset = key->offset;
5259 rec->devid = btrfs_device_id(eb, ptr);
5260 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5261 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5263 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5265 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5272 struct block_group_record *
5273 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5276 struct btrfs_block_group_item *ptr;
5277 struct block_group_record *rec;
5279 rec = calloc(1, sizeof(*rec));
5281 fprintf(stderr, "memory allocation failed\n");
5285 rec->cache.start = key->objectid;
5286 rec->cache.size = key->offset;
5288 rec->generation = btrfs_header_generation(leaf);
5290 rec->objectid = key->objectid;
5291 rec->type = key->type;
5292 rec->offset = key->offset;
5294 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5295 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5297 INIT_LIST_HEAD(&rec->list);
5302 static int process_block_group_item(struct block_group_tree *block_group_cache,
5303 struct btrfs_key *key,
5304 struct extent_buffer *eb, int slot)
5306 struct block_group_record *rec;
5309 rec = btrfs_new_block_group_record(eb, key, slot);
5310 ret = insert_block_group_record(block_group_cache, rec);
5312 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5313 rec->objectid, rec->offset);
5320 struct device_extent_record *
5321 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5322 struct btrfs_key *key, int slot)
5324 struct device_extent_record *rec;
5325 struct btrfs_dev_extent *ptr;
5327 rec = calloc(1, sizeof(*rec));
5329 fprintf(stderr, "memory allocation failed\n");
5333 rec->cache.objectid = key->objectid;
5334 rec->cache.start = key->offset;
5336 rec->generation = btrfs_header_generation(leaf);
5338 rec->objectid = key->objectid;
5339 rec->type = key->type;
5340 rec->offset = key->offset;
5342 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5343 rec->chunk_objecteid =
5344 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5346 btrfs_dev_extent_chunk_offset(leaf, ptr);
5347 rec->length = btrfs_dev_extent_length(leaf, ptr);
5348 rec->cache.size = rec->length;
5350 INIT_LIST_HEAD(&rec->chunk_list);
5351 INIT_LIST_HEAD(&rec->device_list);
5357 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5358 struct btrfs_key *key, struct extent_buffer *eb,
5361 struct device_extent_record *rec;
5364 rec = btrfs_new_device_extent_record(eb, key, slot);
5365 ret = insert_device_extent_record(dev_extent_cache, rec);
5368 "Device extent[%llu, %llu, %llu] existed.\n",
5369 rec->objectid, rec->offset, rec->length);
5376 static int process_extent_item(struct btrfs_root *root,
5377 struct cache_tree *extent_cache,
5378 struct extent_buffer *eb, int slot)
5380 struct btrfs_extent_item *ei;
5381 struct btrfs_extent_inline_ref *iref;
5382 struct btrfs_extent_data_ref *dref;
5383 struct btrfs_shared_data_ref *sref;
5384 struct btrfs_key key;
5385 struct extent_record tmpl;
5389 u32 item_size = btrfs_item_size_nr(eb, slot);
5395 btrfs_item_key_to_cpu(eb, &key, slot);
5397 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5399 num_bytes = root->nodesize;
5401 num_bytes = key.offset;
5404 if (item_size < sizeof(*ei)) {
5405 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5406 struct btrfs_extent_item_v0 *ei0;
5407 BUG_ON(item_size != sizeof(*ei0));
5408 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5409 refs = btrfs_extent_refs_v0(eb, ei0);
5413 memset(&tmpl, 0, sizeof(tmpl));
5414 tmpl.start = key.objectid;
5415 tmpl.nr = num_bytes;
5416 tmpl.extent_item_refs = refs;
5417 tmpl.metadata = metadata;
5419 tmpl.max_size = num_bytes;
5421 return add_extent_rec(extent_cache, &tmpl);
5424 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5425 refs = btrfs_extent_refs(eb, ei);
5426 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5431 memset(&tmpl, 0, sizeof(tmpl));
5432 tmpl.start = key.objectid;
5433 tmpl.nr = num_bytes;
5434 tmpl.extent_item_refs = refs;
5435 tmpl.metadata = metadata;
5437 tmpl.max_size = num_bytes;
5438 add_extent_rec(extent_cache, &tmpl);
5440 ptr = (unsigned long)(ei + 1);
5441 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5442 key.type == BTRFS_EXTENT_ITEM_KEY)
5443 ptr += sizeof(struct btrfs_tree_block_info);
5445 end = (unsigned long)ei + item_size;
5447 iref = (struct btrfs_extent_inline_ref *)ptr;
5448 type = btrfs_extent_inline_ref_type(eb, iref);
5449 offset = btrfs_extent_inline_ref_offset(eb, iref);
5451 case BTRFS_TREE_BLOCK_REF_KEY:
5452 add_tree_backref(extent_cache, key.objectid,
5455 case BTRFS_SHARED_BLOCK_REF_KEY:
5456 add_tree_backref(extent_cache, key.objectid,
5459 case BTRFS_EXTENT_DATA_REF_KEY:
5460 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5461 add_data_backref(extent_cache, key.objectid, 0,
5462 btrfs_extent_data_ref_root(eb, dref),
5463 btrfs_extent_data_ref_objectid(eb,
5465 btrfs_extent_data_ref_offset(eb, dref),
5466 btrfs_extent_data_ref_count(eb, dref),
5469 case BTRFS_SHARED_DATA_REF_KEY:
5470 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5471 add_data_backref(extent_cache, key.objectid, offset,
5473 btrfs_shared_data_ref_count(eb, sref),
5477 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5478 key.objectid, key.type, num_bytes);
5481 ptr += btrfs_extent_inline_ref_size(type);
5488 static int check_cache_range(struct btrfs_root *root,
5489 struct btrfs_block_group_cache *cache,
5490 u64 offset, u64 bytes)
5492 struct btrfs_free_space *entry;
5498 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5499 bytenr = btrfs_sb_offset(i);
5500 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5501 cache->key.objectid, bytenr, 0,
5502 &logical, &nr, &stripe_len);
5507 if (logical[nr] + stripe_len <= offset)
5509 if (offset + bytes <= logical[nr])
5511 if (logical[nr] == offset) {
5512 if (stripe_len >= bytes) {
5516 bytes -= stripe_len;
5517 offset += stripe_len;
5518 } else if (logical[nr] < offset) {
5519 if (logical[nr] + stripe_len >=
5524 bytes = (offset + bytes) -
5525 (logical[nr] + stripe_len);
5526 offset = logical[nr] + stripe_len;
5529 * Could be tricky, the super may land in the
5530 * middle of the area we're checking. First
5531 * check the easiest case, it's at the end.
5533 if (logical[nr] + stripe_len >=
5535 bytes = logical[nr] - offset;
5539 /* Check the left side */
5540 ret = check_cache_range(root, cache,
5542 logical[nr] - offset);
5548 /* Now we continue with the right side */
5549 bytes = (offset + bytes) -
5550 (logical[nr] + stripe_len);
5551 offset = logical[nr] + stripe_len;
5558 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5560 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5561 offset, offset+bytes);
5565 if (entry->offset != offset) {
5566 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5571 if (entry->bytes != bytes) {
5572 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5573 bytes, entry->bytes, offset);
5577 unlink_free_space(cache->free_space_ctl, entry);
5582 static int verify_space_cache(struct btrfs_root *root,
5583 struct btrfs_block_group_cache *cache)
5585 struct btrfs_path *path;
5586 struct extent_buffer *leaf;
5587 struct btrfs_key key;
5591 path = btrfs_alloc_path();
5595 root = root->fs_info->extent_root;
5597 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5599 key.objectid = last;
5601 key.type = BTRFS_EXTENT_ITEM_KEY;
5603 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5608 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5609 ret = btrfs_next_leaf(root, path);
5617 leaf = path->nodes[0];
5618 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5619 if (key.objectid >= cache->key.offset + cache->key.objectid)
5621 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5622 key.type != BTRFS_METADATA_ITEM_KEY) {
5627 if (last == key.objectid) {
5628 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5629 last = key.objectid + key.offset;
5631 last = key.objectid + root->nodesize;
5636 ret = check_cache_range(root, cache, last,
5637 key.objectid - last);
5640 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5641 last = key.objectid + key.offset;
5643 last = key.objectid + root->nodesize;
5647 if (last < cache->key.objectid + cache->key.offset)
5648 ret = check_cache_range(root, cache, last,
5649 cache->key.objectid +
5650 cache->key.offset - last);
5653 btrfs_free_path(path);
5656 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5657 fprintf(stderr, "There are still entries left in the space "
5665 static int check_space_cache(struct btrfs_root *root)
5667 struct btrfs_block_group_cache *cache;
5668 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5672 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5673 btrfs_super_generation(root->fs_info->super_copy) !=
5674 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5675 printf("cache and super generation don't match, space cache "
5676 "will be invalidated\n");
5680 if (ctx.progress_enabled) {
5681 ctx.tp = TASK_FREE_SPACE;
5682 task_start(ctx.info);
5686 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5690 start = cache->key.objectid + cache->key.offset;
5691 if (!cache->free_space_ctl) {
5692 if (btrfs_init_free_space_ctl(cache,
5693 root->sectorsize)) {
5698 btrfs_remove_free_space_cache(cache);
5701 if (btrfs_fs_compat_ro(root->fs_info,
5702 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5703 ret = exclude_super_stripes(root, cache);
5705 fprintf(stderr, "could not exclude super stripes: %s\n",
5710 ret = load_free_space_tree(root->fs_info, cache);
5711 free_excluded_extents(root, cache);
5713 fprintf(stderr, "could not load free space tree: %s\n",
5720 ret = load_free_space_cache(root->fs_info, cache);
5725 ret = verify_space_cache(root, cache);
5727 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5728 cache->key.objectid);
5733 task_stop(ctx.info);
5735 return error ? -EINVAL : 0;
5738 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5739 u64 num_bytes, unsigned long leaf_offset,
5740 struct extent_buffer *eb) {
5743 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5745 unsigned long csum_offset;
5749 u64 data_checked = 0;
5755 if (num_bytes % root->sectorsize)
5758 data = malloc(num_bytes);
5762 while (offset < num_bytes) {
5765 read_len = num_bytes - offset;
5766 /* read as much space once a time */
5767 ret = read_extent_data(root, data + offset,
5768 bytenr + offset, &read_len, mirror);
5772 /* verify every 4k data's checksum */
5773 while (data_checked < read_len) {
5775 tmp = offset + data_checked;
5777 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5778 csum, root->sectorsize);
5779 btrfs_csum_final(csum, (char *)&csum);
5781 csum_offset = leaf_offset +
5782 tmp / root->sectorsize * csum_size;
5783 read_extent_buffer(eb, (char *)&csum_expected,
5784 csum_offset, csum_size);
5785 /* try another mirror */
5786 if (csum != csum_expected) {
5787 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5788 mirror, bytenr + tmp,
5789 csum, csum_expected);
5790 num_copies = btrfs_num_copies(
5791 &root->fs_info->mapping_tree,
5793 if (mirror < num_copies - 1) {
5798 data_checked += root->sectorsize;
5807 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5810 struct btrfs_path *path;
5811 struct extent_buffer *leaf;
5812 struct btrfs_key key;
5815 path = btrfs_alloc_path();
5817 fprintf(stderr, "Error allocating path\n");
5821 key.objectid = bytenr;
5822 key.type = BTRFS_EXTENT_ITEM_KEY;
5823 key.offset = (u64)-1;
5826 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5829 fprintf(stderr, "Error looking up extent record %d\n", ret);
5830 btrfs_free_path(path);
5833 if (path->slots[0] > 0) {
5836 ret = btrfs_prev_leaf(root, path);
5839 } else if (ret > 0) {
5846 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5849 * Block group items come before extent items if they have the same
5850 * bytenr, so walk back one more just in case. Dear future traveller,
5851 * first congrats on mastering time travel. Now if it's not too much
5852 * trouble could you go back to 2006 and tell Chris to make the
5853 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5854 * EXTENT_ITEM_KEY please?
5856 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5857 if (path->slots[0] > 0) {
5860 ret = btrfs_prev_leaf(root, path);
5863 } else if (ret > 0) {
5868 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5872 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5873 ret = btrfs_next_leaf(root, path);
5875 fprintf(stderr, "Error going to next leaf "
5877 btrfs_free_path(path);
5883 leaf = path->nodes[0];
5884 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5885 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5889 if (key.objectid + key.offset < bytenr) {
5893 if (key.objectid > bytenr + num_bytes)
5896 if (key.objectid == bytenr) {
5897 if (key.offset >= num_bytes) {
5901 num_bytes -= key.offset;
5902 bytenr += key.offset;
5903 } else if (key.objectid < bytenr) {
5904 if (key.objectid + key.offset >= bytenr + num_bytes) {
5908 num_bytes = (bytenr + num_bytes) -
5909 (key.objectid + key.offset);
5910 bytenr = key.objectid + key.offset;
5912 if (key.objectid + key.offset < bytenr + num_bytes) {
5913 u64 new_start = key.objectid + key.offset;
5914 u64 new_bytes = bytenr + num_bytes - new_start;
5917 * Weird case, the extent is in the middle of
5918 * our range, we'll have to search one side
5919 * and then the other. Not sure if this happens
5920 * in real life, but no harm in coding it up
5921 * anyway just in case.
5923 btrfs_release_path(path);
5924 ret = check_extent_exists(root, new_start,
5927 fprintf(stderr, "Right section didn't "
5931 num_bytes = key.objectid - bytenr;
5934 num_bytes = key.objectid - bytenr;
5941 if (num_bytes && !ret) {
5942 fprintf(stderr, "There are no extents for csum range "
5943 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5947 btrfs_free_path(path);
5951 static int check_csums(struct btrfs_root *root)
5953 struct btrfs_path *path;
5954 struct extent_buffer *leaf;
5955 struct btrfs_key key;
5956 u64 offset = 0, num_bytes = 0;
5957 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5961 unsigned long leaf_offset;
5963 root = root->fs_info->csum_root;
5964 if (!extent_buffer_uptodate(root->node)) {
5965 fprintf(stderr, "No valid csum tree found\n");
5969 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5970 key.type = BTRFS_EXTENT_CSUM_KEY;
5973 path = btrfs_alloc_path();
5977 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5979 fprintf(stderr, "Error searching csum tree %d\n", ret);
5980 btrfs_free_path(path);
5984 if (ret > 0 && path->slots[0])
5989 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5990 ret = btrfs_next_leaf(root, path);
5992 fprintf(stderr, "Error going to next leaf "
5999 leaf = path->nodes[0];
6001 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6002 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6007 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6008 csum_size) * root->sectorsize;
6009 if (!check_data_csum)
6010 goto skip_csum_check;
6011 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6012 ret = check_extent_csums(root, key.offset, data_len,
6018 offset = key.offset;
6019 } else if (key.offset != offset + num_bytes) {
6020 ret = check_extent_exists(root, offset, num_bytes);
6022 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6023 "there is no extent record\n",
6024 offset, offset+num_bytes);
6027 offset = key.offset;
6030 num_bytes += data_len;
6034 btrfs_free_path(path);
6038 static int is_dropped_key(struct btrfs_key *key,
6039 struct btrfs_key *drop_key) {
6040 if (key->objectid < drop_key->objectid)
6042 else if (key->objectid == drop_key->objectid) {
6043 if (key->type < drop_key->type)
6045 else if (key->type == drop_key->type) {
6046 if (key->offset < drop_key->offset)
6054 * Here are the rules for FULL_BACKREF.
6056 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6057 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6059 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6060 * if it happened after the relocation occurred since we'll have dropped the
6061 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6062 * have no real way to know for sure.
6064 * We process the blocks one root at a time, and we start from the lowest root
6065 * objectid and go to the highest. So we can just lookup the owner backref for
6066 * the record and if we don't find it then we know it doesn't exist and we have
6069 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6070 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6071 * be set or not and then we can check later once we've gathered all the refs.
6073 static int calc_extent_flag(struct btrfs_root *root,
6074 struct cache_tree *extent_cache,
6075 struct extent_buffer *buf,
6076 struct root_item_record *ri,
6079 struct extent_record *rec;
6080 struct cache_extent *cache;
6081 struct tree_backref *tback;
6084 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6085 /* we have added this extent before */
6087 rec = container_of(cache, struct extent_record, cache);
6090 * Except file/reloc tree, we can not have
6093 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6098 if (buf->start == ri->bytenr)
6101 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6104 owner = btrfs_header_owner(buf);
6105 if (owner == ri->objectid)
6108 tback = find_tree_backref(rec, 0, owner);
6113 if (rec->flag_block_full_backref != FLAG_UNSET &&
6114 rec->flag_block_full_backref != 0)
6115 rec->bad_full_backref = 1;
6118 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6119 if (rec->flag_block_full_backref != FLAG_UNSET &&
6120 rec->flag_block_full_backref != 1)
6121 rec->bad_full_backref = 1;
6125 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6127 fprintf(stderr, "Invalid key type(");
6128 print_key_type(stderr, 0, key_type);
6129 fprintf(stderr, ") found in root(");
6130 print_objectid(stderr, rootid, 0);
6131 fprintf(stderr, ")\n");
6135 * Check if the key is valid with its extent buffer.
6137 * This is a early check in case invalid key exists in a extent buffer
6138 * This is not comprehensive yet, but should prevent wrong key/item passed
6141 static int check_type_with_root(u64 rootid, u8 key_type)
6144 /* Only valid in chunk tree */
6145 case BTRFS_DEV_ITEM_KEY:
6146 case BTRFS_CHUNK_ITEM_KEY:
6147 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6150 /* valid in csum and log tree */
6151 case BTRFS_CSUM_TREE_OBJECTID:
6152 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6156 case BTRFS_EXTENT_ITEM_KEY:
6157 case BTRFS_METADATA_ITEM_KEY:
6158 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6159 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6162 case BTRFS_ROOT_ITEM_KEY:
6163 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6166 case BTRFS_DEV_EXTENT_KEY:
6167 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6173 report_mismatch_key_root(key_type, rootid);
6177 static int run_next_block(struct btrfs_root *root,
6178 struct block_info *bits,
6181 struct cache_tree *pending,
6182 struct cache_tree *seen,
6183 struct cache_tree *reada,
6184 struct cache_tree *nodes,
6185 struct cache_tree *extent_cache,
6186 struct cache_tree *chunk_cache,
6187 struct rb_root *dev_cache,
6188 struct block_group_tree *block_group_cache,
6189 struct device_extent_tree *dev_extent_cache,
6190 struct root_item_record *ri)
6192 struct extent_buffer *buf;
6193 struct extent_record *rec = NULL;
6204 struct btrfs_key key;
6205 struct cache_extent *cache;
6208 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6209 bits_nr, &reada_bits);
6214 for(i = 0; i < nritems; i++) {
6215 ret = add_cache_extent(reada, bits[i].start,
6220 /* fixme, get the parent transid */
6221 readahead_tree_block(root, bits[i].start,
6225 *last = bits[0].start;
6226 bytenr = bits[0].start;
6227 size = bits[0].size;
6229 cache = lookup_cache_extent(pending, bytenr, size);
6231 remove_cache_extent(pending, cache);
6234 cache = lookup_cache_extent(reada, bytenr, size);
6236 remove_cache_extent(reada, cache);
6239 cache = lookup_cache_extent(nodes, bytenr, size);
6241 remove_cache_extent(nodes, cache);
6244 cache = lookup_cache_extent(extent_cache, bytenr, size);
6246 rec = container_of(cache, struct extent_record, cache);
6247 gen = rec->parent_generation;
6250 /* fixme, get the real parent transid */
6251 buf = read_tree_block(root, bytenr, size, gen);
6252 if (!extent_buffer_uptodate(buf)) {
6253 record_bad_block_io(root->fs_info,
6254 extent_cache, bytenr, size);
6258 nritems = btrfs_header_nritems(buf);
6261 if (!init_extent_tree) {
6262 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6263 btrfs_header_level(buf), 1, NULL,
6266 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6268 fprintf(stderr, "Couldn't calc extent flags\n");
6269 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6274 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6276 fprintf(stderr, "Couldn't calc extent flags\n");
6277 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6281 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6283 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6284 ri->objectid == btrfs_header_owner(buf)) {
6286 * Ok we got to this block from it's original owner and
6287 * we have FULL_BACKREF set. Relocation can leave
6288 * converted blocks over so this is altogether possible,
6289 * however it's not possible if the generation > the
6290 * last snapshot, so check for this case.
6292 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6293 btrfs_header_generation(buf) > ri->last_snapshot) {
6294 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6295 rec->bad_full_backref = 1;
6300 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6301 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6302 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6303 rec->bad_full_backref = 1;
6307 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6308 rec->flag_block_full_backref = 1;
6312 rec->flag_block_full_backref = 0;
6314 owner = btrfs_header_owner(buf);
6317 ret = check_block(root, extent_cache, buf, flags);
6321 if (btrfs_is_leaf(buf)) {
6322 btree_space_waste += btrfs_leaf_free_space(root, buf);
6323 for (i = 0; i < nritems; i++) {
6324 struct btrfs_file_extent_item *fi;
6325 btrfs_item_key_to_cpu(buf, &key, i);
6327 * Check key type against the leaf owner.
6328 * Could filter quite a lot of early error if
6331 if (check_type_with_root(btrfs_header_owner(buf),
6333 fprintf(stderr, "ignoring invalid key\n");
6336 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6337 process_extent_item(root, extent_cache, buf,
6341 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6342 process_extent_item(root, extent_cache, buf,
6346 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6348 btrfs_item_size_nr(buf, i);
6351 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6352 process_chunk_item(chunk_cache, &key, buf, i);
6355 if (key.type == BTRFS_DEV_ITEM_KEY) {
6356 process_device_item(dev_cache, &key, buf, i);
6359 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6360 process_block_group_item(block_group_cache,
6364 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6365 process_device_extent_item(dev_extent_cache,
6370 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6371 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6372 process_extent_ref_v0(extent_cache, buf, i);
6379 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6380 add_tree_backref(extent_cache, key.objectid, 0,
6384 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6385 add_tree_backref(extent_cache, key.objectid,
6389 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6390 struct btrfs_extent_data_ref *ref;
6391 ref = btrfs_item_ptr(buf, i,
6392 struct btrfs_extent_data_ref);
6393 add_data_backref(extent_cache,
6395 btrfs_extent_data_ref_root(buf, ref),
6396 btrfs_extent_data_ref_objectid(buf,
6398 btrfs_extent_data_ref_offset(buf, ref),
6399 btrfs_extent_data_ref_count(buf, ref),
6400 0, root->sectorsize);
6403 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6404 struct btrfs_shared_data_ref *ref;
6405 ref = btrfs_item_ptr(buf, i,
6406 struct btrfs_shared_data_ref);
6407 add_data_backref(extent_cache,
6408 key.objectid, key.offset, 0, 0, 0,
6409 btrfs_shared_data_ref_count(buf, ref),
6410 0, root->sectorsize);
6413 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6414 struct bad_item *bad;
6416 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6420 bad = malloc(sizeof(struct bad_item));
6423 INIT_LIST_HEAD(&bad->list);
6424 memcpy(&bad->key, &key,
6425 sizeof(struct btrfs_key));
6426 bad->root_id = owner;
6427 list_add_tail(&bad->list, &delete_items);
6430 if (key.type != BTRFS_EXTENT_DATA_KEY)
6432 fi = btrfs_item_ptr(buf, i,
6433 struct btrfs_file_extent_item);
6434 if (btrfs_file_extent_type(buf, fi) ==
6435 BTRFS_FILE_EXTENT_INLINE)
6437 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6440 data_bytes_allocated +=
6441 btrfs_file_extent_disk_num_bytes(buf, fi);
6442 if (data_bytes_allocated < root->sectorsize) {
6445 data_bytes_referenced +=
6446 btrfs_file_extent_num_bytes(buf, fi);
6447 add_data_backref(extent_cache,
6448 btrfs_file_extent_disk_bytenr(buf, fi),
6449 parent, owner, key.objectid, key.offset -
6450 btrfs_file_extent_offset(buf, fi), 1, 1,
6451 btrfs_file_extent_disk_num_bytes(buf, fi));
6455 struct btrfs_key first_key;
6457 first_key.objectid = 0;
6460 btrfs_item_key_to_cpu(buf, &first_key, 0);
6461 level = btrfs_header_level(buf);
6462 for (i = 0; i < nritems; i++) {
6463 struct extent_record tmpl;
6465 ptr = btrfs_node_blockptr(buf, i);
6466 size = root->nodesize;
6467 btrfs_node_key_to_cpu(buf, &key, i);
6469 if ((level == ri->drop_level)
6470 && is_dropped_key(&key, &ri->drop_key)) {
6475 memset(&tmpl, 0, sizeof(tmpl));
6476 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6477 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6482 tmpl.max_size = size;
6483 ret = add_extent_rec(extent_cache, &tmpl);
6486 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6489 add_pending(nodes, seen, ptr, size);
6491 add_pending(pending, seen, ptr, size);
6494 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6495 nritems) * sizeof(struct btrfs_key_ptr);
6497 total_btree_bytes += buf->len;
6498 if (fs_root_objectid(btrfs_header_owner(buf)))
6499 total_fs_tree_bytes += buf->len;
6500 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6501 total_extent_tree_bytes += buf->len;
6502 if (!found_old_backref &&
6503 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6504 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6505 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6506 found_old_backref = 1;
6508 free_extent_buffer(buf);
6512 static int add_root_to_pending(struct extent_buffer *buf,
6513 struct cache_tree *extent_cache,
6514 struct cache_tree *pending,
6515 struct cache_tree *seen,
6516 struct cache_tree *nodes,
6519 struct extent_record tmpl;
6521 if (btrfs_header_level(buf) > 0)
6522 add_pending(nodes, seen, buf->start, buf->len);
6524 add_pending(pending, seen, buf->start, buf->len);
6526 memset(&tmpl, 0, sizeof(tmpl));
6527 tmpl.start = buf->start;
6532 tmpl.max_size = buf->len;
6533 add_extent_rec(extent_cache, &tmpl);
6535 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6536 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6537 add_tree_backref(extent_cache, buf->start, buf->start,
6540 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6544 /* as we fix the tree, we might be deleting blocks that
6545 * we're tracking for repair. This hook makes sure we
6546 * remove any backrefs for blocks as we are fixing them.
6548 static int free_extent_hook(struct btrfs_trans_handle *trans,
6549 struct btrfs_root *root,
6550 u64 bytenr, u64 num_bytes, u64 parent,
6551 u64 root_objectid, u64 owner, u64 offset,
6554 struct extent_record *rec;
6555 struct cache_extent *cache;
6557 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6559 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6560 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6564 rec = container_of(cache, struct extent_record, cache);
6566 struct data_backref *back;
6567 back = find_data_backref(rec, parent, root_objectid, owner,
6568 offset, 1, bytenr, num_bytes);
6571 if (back->node.found_ref) {
6572 back->found_ref -= refs_to_drop;
6574 rec->refs -= refs_to_drop;
6576 if (back->node.found_extent_tree) {
6577 back->num_refs -= refs_to_drop;
6578 if (rec->extent_item_refs)
6579 rec->extent_item_refs -= refs_to_drop;
6581 if (back->found_ref == 0)
6582 back->node.found_ref = 0;
6583 if (back->num_refs == 0)
6584 back->node.found_extent_tree = 0;
6586 if (!back->node.found_extent_tree && back->node.found_ref) {
6587 rb_erase(&back->node.node, &rec->backref_tree);
6591 struct tree_backref *back;
6592 back = find_tree_backref(rec, parent, root_objectid);
6595 if (back->node.found_ref) {
6598 back->node.found_ref = 0;
6600 if (back->node.found_extent_tree) {
6601 if (rec->extent_item_refs)
6602 rec->extent_item_refs--;
6603 back->node.found_extent_tree = 0;
6605 if (!back->node.found_extent_tree && back->node.found_ref) {
6606 rb_erase(&back->node.node, &rec->backref_tree);
6610 maybe_free_extent_rec(extent_cache, rec);
6615 static int delete_extent_records(struct btrfs_trans_handle *trans,
6616 struct btrfs_root *root,
6617 struct btrfs_path *path,
6618 u64 bytenr, u64 new_len)
6620 struct btrfs_key key;
6621 struct btrfs_key found_key;
6622 struct extent_buffer *leaf;
6627 key.objectid = bytenr;
6629 key.offset = (u64)-1;
6632 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6639 if (path->slots[0] == 0)
6645 leaf = path->nodes[0];
6646 slot = path->slots[0];
6648 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6649 if (found_key.objectid != bytenr)
6652 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6653 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6654 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6655 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6656 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6657 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6658 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6659 btrfs_release_path(path);
6660 if (found_key.type == 0) {
6661 if (found_key.offset == 0)
6663 key.offset = found_key.offset - 1;
6664 key.type = found_key.type;
6666 key.type = found_key.type - 1;
6667 key.offset = (u64)-1;
6671 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6672 found_key.objectid, found_key.type, found_key.offset);
6674 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6677 btrfs_release_path(path);
6679 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6680 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6681 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6682 found_key.offset : root->nodesize;
6684 ret = btrfs_update_block_group(trans, root, bytenr,
6691 btrfs_release_path(path);
6696 * for a single backref, this will allocate a new extent
6697 * and add the backref to it.
6699 static int record_extent(struct btrfs_trans_handle *trans,
6700 struct btrfs_fs_info *info,
6701 struct btrfs_path *path,
6702 struct extent_record *rec,
6703 struct extent_backref *back,
6704 int allocated, u64 flags)
6707 struct btrfs_root *extent_root = info->extent_root;
6708 struct extent_buffer *leaf;
6709 struct btrfs_key ins_key;
6710 struct btrfs_extent_item *ei;
6711 struct tree_backref *tback;
6712 struct data_backref *dback;
6713 struct btrfs_tree_block_info *bi;
6716 rec->max_size = max_t(u64, rec->max_size,
6717 info->extent_root->nodesize);
6720 u32 item_size = sizeof(*ei);
6723 item_size += sizeof(*bi);
6725 ins_key.objectid = rec->start;
6726 ins_key.offset = rec->max_size;
6727 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6729 ret = btrfs_insert_empty_item(trans, extent_root, path,
6730 &ins_key, item_size);
6734 leaf = path->nodes[0];
6735 ei = btrfs_item_ptr(leaf, path->slots[0],
6736 struct btrfs_extent_item);
6738 btrfs_set_extent_refs(leaf, ei, 0);
6739 btrfs_set_extent_generation(leaf, ei, rec->generation);
6741 if (back->is_data) {
6742 btrfs_set_extent_flags(leaf, ei,
6743 BTRFS_EXTENT_FLAG_DATA);
6745 struct btrfs_disk_key copy_key;;
6747 tback = to_tree_backref(back);
6748 bi = (struct btrfs_tree_block_info *)(ei + 1);
6749 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6752 btrfs_set_disk_key_objectid(©_key,
6753 rec->info_objectid);
6754 btrfs_set_disk_key_type(©_key, 0);
6755 btrfs_set_disk_key_offset(©_key, 0);
6757 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6758 btrfs_set_tree_block_key(leaf, bi, ©_key);
6760 btrfs_set_extent_flags(leaf, ei,
6761 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6764 btrfs_mark_buffer_dirty(leaf);
6765 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6766 rec->max_size, 1, 0);
6769 btrfs_release_path(path);
6772 if (back->is_data) {
6776 dback = to_data_backref(back);
6777 if (back->full_backref)
6778 parent = dback->parent;
6782 for (i = 0; i < dback->found_ref; i++) {
6783 /* if parent != 0, we're doing a full backref
6784 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6785 * just makes the backref allocator create a data
6788 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6789 rec->start, rec->max_size,
6793 BTRFS_FIRST_FREE_OBJECTID :
6799 fprintf(stderr, "adding new data backref"
6800 " on %llu %s %llu owner %llu"
6801 " offset %llu found %d\n",
6802 (unsigned long long)rec->start,
6803 back->full_backref ?
6805 back->full_backref ?
6806 (unsigned long long)parent :
6807 (unsigned long long)dback->root,
6808 (unsigned long long)dback->owner,
6809 (unsigned long long)dback->offset,
6814 tback = to_tree_backref(back);
6815 if (back->full_backref)
6816 parent = tback->parent;
6820 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6821 rec->start, rec->max_size,
6822 parent, tback->root, 0, 0);
6823 fprintf(stderr, "adding new tree backref on "
6824 "start %llu len %llu parent %llu root %llu\n",
6825 rec->start, rec->max_size, parent, tback->root);
6828 btrfs_release_path(path);
6832 static struct extent_entry *find_entry(struct list_head *entries,
6833 u64 bytenr, u64 bytes)
6835 struct extent_entry *entry = NULL;
6837 list_for_each_entry(entry, entries, list) {
6838 if (entry->bytenr == bytenr && entry->bytes == bytes)
6845 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6847 struct extent_entry *entry, *best = NULL, *prev = NULL;
6849 list_for_each_entry(entry, entries, list) {
6856 * If there are as many broken entries as entries then we know
6857 * not to trust this particular entry.
6859 if (entry->broken == entry->count)
6863 * If our current entry == best then we can't be sure our best
6864 * is really the best, so we need to keep searching.
6866 if (best && best->count == entry->count) {
6872 /* Prev == entry, not good enough, have to keep searching */
6873 if (!prev->broken && prev->count == entry->count)
6877 best = (prev->count > entry->count) ? prev : entry;
6878 else if (best->count < entry->count)
6886 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6887 struct data_backref *dback, struct extent_entry *entry)
6889 struct btrfs_trans_handle *trans;
6890 struct btrfs_root *root;
6891 struct btrfs_file_extent_item *fi;
6892 struct extent_buffer *leaf;
6893 struct btrfs_key key;
6897 key.objectid = dback->root;
6898 key.type = BTRFS_ROOT_ITEM_KEY;
6899 key.offset = (u64)-1;
6900 root = btrfs_read_fs_root(info, &key);
6902 fprintf(stderr, "Couldn't find root for our ref\n");
6907 * The backref points to the original offset of the extent if it was
6908 * split, so we need to search down to the offset we have and then walk
6909 * forward until we find the backref we're looking for.
6911 key.objectid = dback->owner;
6912 key.type = BTRFS_EXTENT_DATA_KEY;
6913 key.offset = dback->offset;
6914 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6916 fprintf(stderr, "Error looking up ref %d\n", ret);
6921 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6922 ret = btrfs_next_leaf(root, path);
6924 fprintf(stderr, "Couldn't find our ref, next\n");
6928 leaf = path->nodes[0];
6929 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6930 if (key.objectid != dback->owner ||
6931 key.type != BTRFS_EXTENT_DATA_KEY) {
6932 fprintf(stderr, "Couldn't find our ref, search\n");
6935 fi = btrfs_item_ptr(leaf, path->slots[0],
6936 struct btrfs_file_extent_item);
6937 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6938 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6940 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6945 btrfs_release_path(path);
6947 trans = btrfs_start_transaction(root, 1);
6949 return PTR_ERR(trans);
6952 * Ok we have the key of the file extent we want to fix, now we can cow
6953 * down to the thing and fix it.
6955 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6957 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6958 key.objectid, key.type, key.offset, ret);
6962 fprintf(stderr, "Well that's odd, we just found this key "
6963 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6968 leaf = path->nodes[0];
6969 fi = btrfs_item_ptr(leaf, path->slots[0],
6970 struct btrfs_file_extent_item);
6972 if (btrfs_file_extent_compression(leaf, fi) &&
6973 dback->disk_bytenr != entry->bytenr) {
6974 fprintf(stderr, "Ref doesn't match the record start and is "
6975 "compressed, please take a btrfs-image of this file "
6976 "system and send it to a btrfs developer so they can "
6977 "complete this functionality for bytenr %Lu\n",
6978 dback->disk_bytenr);
6983 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6984 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6985 } else if (dback->disk_bytenr > entry->bytenr) {
6986 u64 off_diff, offset;
6988 off_diff = dback->disk_bytenr - entry->bytenr;
6989 offset = btrfs_file_extent_offset(leaf, fi);
6990 if (dback->disk_bytenr + offset +
6991 btrfs_file_extent_num_bytes(leaf, fi) >
6992 entry->bytenr + entry->bytes) {
6993 fprintf(stderr, "Ref is past the entry end, please "
6994 "take a btrfs-image of this file system and "
6995 "send it to a btrfs developer, ref %Lu\n",
6996 dback->disk_bytenr);
7001 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7002 btrfs_set_file_extent_offset(leaf, fi, offset);
7003 } else if (dback->disk_bytenr < entry->bytenr) {
7006 offset = btrfs_file_extent_offset(leaf, fi);
7007 if (dback->disk_bytenr + offset < entry->bytenr) {
7008 fprintf(stderr, "Ref is before the entry start, please"
7009 " take a btrfs-image of this file system and "
7010 "send it to a btrfs developer, ref %Lu\n",
7011 dback->disk_bytenr);
7016 offset += dback->disk_bytenr;
7017 offset -= entry->bytenr;
7018 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7019 btrfs_set_file_extent_offset(leaf, fi, offset);
7022 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7025 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7026 * only do this if we aren't using compression, otherwise it's a
7029 if (!btrfs_file_extent_compression(leaf, fi))
7030 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7032 printf("ram bytes may be wrong?\n");
7033 btrfs_mark_buffer_dirty(leaf);
7035 err = btrfs_commit_transaction(trans, root);
7036 btrfs_release_path(path);
7037 return ret ? ret : err;
7040 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7041 struct extent_record *rec)
7043 struct extent_backref *back, *tmp;
7044 struct data_backref *dback;
7045 struct extent_entry *entry, *best = NULL;
7048 int broken_entries = 0;
7053 * Metadata is easy and the backrefs should always agree on bytenr and
7054 * size, if not we've got bigger issues.
7059 rbtree_postorder_for_each_entry_safe(back, tmp,
7060 &rec->backref_tree, node) {
7061 if (back->full_backref || !back->is_data)
7064 dback = to_data_backref(back);
7067 * We only pay attention to backrefs that we found a real
7070 if (dback->found_ref == 0)
7074 * For now we only catch when the bytes don't match, not the
7075 * bytenr. We can easily do this at the same time, but I want
7076 * to have a fs image to test on before we just add repair
7077 * functionality willy-nilly so we know we won't screw up the
7081 entry = find_entry(&entries, dback->disk_bytenr,
7084 entry = malloc(sizeof(struct extent_entry));
7089 memset(entry, 0, sizeof(*entry));
7090 entry->bytenr = dback->disk_bytenr;
7091 entry->bytes = dback->bytes;
7092 list_add_tail(&entry->list, &entries);
7097 * If we only have on entry we may think the entries agree when
7098 * in reality they don't so we have to do some extra checking.
7100 if (dback->disk_bytenr != rec->start ||
7101 dback->bytes != rec->nr || back->broken)
7112 /* Yay all the backrefs agree, carry on good sir */
7113 if (nr_entries <= 1 && !mismatch)
7116 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7117 "%Lu\n", rec->start);
7120 * First we want to see if the backrefs can agree amongst themselves who
7121 * is right, so figure out which one of the entries has the highest
7124 best = find_most_right_entry(&entries);
7127 * Ok so we may have an even split between what the backrefs think, so
7128 * this is where we use the extent ref to see what it thinks.
7131 entry = find_entry(&entries, rec->start, rec->nr);
7132 if (!entry && (!broken_entries || !rec->found_rec)) {
7133 fprintf(stderr, "Backrefs don't agree with each other "
7134 "and extent record doesn't agree with anybody,"
7135 " so we can't fix bytenr %Lu bytes %Lu\n",
7136 rec->start, rec->nr);
7139 } else if (!entry) {
7141 * Ok our backrefs were broken, we'll assume this is the
7142 * correct value and add an entry for this range.
7144 entry = malloc(sizeof(struct extent_entry));
7149 memset(entry, 0, sizeof(*entry));
7150 entry->bytenr = rec->start;
7151 entry->bytes = rec->nr;
7152 list_add_tail(&entry->list, &entries);
7156 best = find_most_right_entry(&entries);
7158 fprintf(stderr, "Backrefs and extent record evenly "
7159 "split on who is right, this is going to "
7160 "require user input to fix bytenr %Lu bytes "
7161 "%Lu\n", rec->start, rec->nr);
7168 * I don't think this can happen currently as we'll abort() if we catch
7169 * this case higher up, but in case somebody removes that we still can't
7170 * deal with it properly here yet, so just bail out of that's the case.
7172 if (best->bytenr != rec->start) {
7173 fprintf(stderr, "Extent start and backref starts don't match, "
7174 "please use btrfs-image on this file system and send "
7175 "it to a btrfs developer so they can make fsck fix "
7176 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7177 rec->start, rec->nr);
7183 * Ok great we all agreed on an extent record, let's go find the real
7184 * references and fix up the ones that don't match.
7186 rbtree_postorder_for_each_entry_safe(back, tmp,
7187 &rec->backref_tree, node) {
7188 if (back->full_backref || !back->is_data)
7191 dback = to_data_backref(back);
7194 * Still ignoring backrefs that don't have a real ref attached
7197 if (dback->found_ref == 0)
7200 if (dback->bytes == best->bytes &&
7201 dback->disk_bytenr == best->bytenr)
7204 ret = repair_ref(info, path, dback, best);
7210 * Ok we messed with the actual refs, which means we need to drop our
7211 * entire cache and go back and rescan. I know this is a huge pain and
7212 * adds a lot of extra work, but it's the only way to be safe. Once all
7213 * the backrefs agree we may not need to do anything to the extent
7218 while (!list_empty(&entries)) {
7219 entry = list_entry(entries.next, struct extent_entry, list);
7220 list_del_init(&entry->list);
7226 static int process_duplicates(struct btrfs_root *root,
7227 struct cache_tree *extent_cache,
7228 struct extent_record *rec)
7230 struct extent_record *good, *tmp;
7231 struct cache_extent *cache;
7235 * If we found a extent record for this extent then return, or if we
7236 * have more than one duplicate we are likely going to need to delete
7239 if (rec->found_rec || rec->num_duplicates > 1)
7242 /* Shouldn't happen but just in case */
7243 BUG_ON(!rec->num_duplicates);
7246 * So this happens if we end up with a backref that doesn't match the
7247 * actual extent entry. So either the backref is bad or the extent
7248 * entry is bad. Either way we want to have the extent_record actually
7249 * reflect what we found in the extent_tree, so we need to take the
7250 * duplicate out and use that as the extent_record since the only way we
7251 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7253 remove_cache_extent(extent_cache, &rec->cache);
7255 good = to_extent_record(rec->dups.next);
7256 list_del_init(&good->list);
7257 INIT_LIST_HEAD(&good->backrefs);
7258 INIT_LIST_HEAD(&good->dups);
7259 good->cache.start = good->start;
7260 good->cache.size = good->nr;
7261 good->content_checked = 0;
7262 good->owner_ref_checked = 0;
7263 good->num_duplicates = 0;
7264 good->refs = rec->refs;
7265 list_splice_init(&rec->backrefs, &good->backrefs);
7267 cache = lookup_cache_extent(extent_cache, good->start,
7271 tmp = container_of(cache, struct extent_record, cache);
7274 * If we find another overlapping extent and it's found_rec is
7275 * set then it's a duplicate and we need to try and delete
7278 if (tmp->found_rec || tmp->num_duplicates > 0) {
7279 if (list_empty(&good->list))
7280 list_add_tail(&good->list,
7281 &duplicate_extents);
7282 good->num_duplicates += tmp->num_duplicates + 1;
7283 list_splice_init(&tmp->dups, &good->dups);
7284 list_del_init(&tmp->list);
7285 list_add_tail(&tmp->list, &good->dups);
7286 remove_cache_extent(extent_cache, &tmp->cache);
7291 * Ok we have another non extent item backed extent rec, so lets
7292 * just add it to this extent and carry on like we did above.
7294 good->refs += tmp->refs;
7295 list_splice_init(&tmp->backrefs, &good->backrefs);
7296 remove_cache_extent(extent_cache, &tmp->cache);
7299 ret = insert_cache_extent(extent_cache, &good->cache);
7302 return good->num_duplicates ? 0 : 1;
7305 static int delete_duplicate_records(struct btrfs_root *root,
7306 struct extent_record *rec)
7308 struct btrfs_trans_handle *trans;
7309 LIST_HEAD(delete_list);
7310 struct btrfs_path *path;
7311 struct extent_record *tmp, *good, *n;
7314 struct btrfs_key key;
7316 path = btrfs_alloc_path();
7323 /* Find the record that covers all of the duplicates. */
7324 list_for_each_entry(tmp, &rec->dups, list) {
7325 if (good->start < tmp->start)
7327 if (good->nr > tmp->nr)
7330 if (tmp->start + tmp->nr < good->start + good->nr) {
7331 fprintf(stderr, "Ok we have overlapping extents that "
7332 "aren't completely covered by each other, this "
7333 "is going to require more careful thought. "
7334 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7335 tmp->start, tmp->nr, good->start, good->nr);
7342 list_add_tail(&rec->list, &delete_list);
7344 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7347 list_move_tail(&tmp->list, &delete_list);
7350 root = root->fs_info->extent_root;
7351 trans = btrfs_start_transaction(root, 1);
7352 if (IS_ERR(trans)) {
7353 ret = PTR_ERR(trans);
7357 list_for_each_entry(tmp, &delete_list, list) {
7358 if (tmp->found_rec == 0)
7360 key.objectid = tmp->start;
7361 key.type = BTRFS_EXTENT_ITEM_KEY;
7362 key.offset = tmp->nr;
7364 /* Shouldn't happen but just in case */
7365 if (tmp->metadata) {
7366 fprintf(stderr, "Well this shouldn't happen, extent "
7367 "record overlaps but is metadata? "
7368 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7372 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7378 ret = btrfs_del_item(trans, root, path);
7381 btrfs_release_path(path);
7384 err = btrfs_commit_transaction(trans, root);
7388 while (!list_empty(&delete_list)) {
7389 tmp = to_extent_record(delete_list.next);
7390 list_del_init(&tmp->list);
7396 while (!list_empty(&rec->dups)) {
7397 tmp = to_extent_record(rec->dups.next);
7398 list_del_init(&tmp->list);
7402 btrfs_free_path(path);
7404 if (!ret && !nr_del)
7405 rec->num_duplicates = 0;
7407 return ret ? ret : nr_del;
7410 static int find_possible_backrefs(struct btrfs_fs_info *info,
7411 struct btrfs_path *path,
7412 struct cache_tree *extent_cache,
7413 struct extent_record *rec)
7415 struct btrfs_root *root;
7416 struct extent_backref *back, *tmp;
7417 struct data_backref *dback;
7418 struct cache_extent *cache;
7419 struct btrfs_file_extent_item *fi;
7420 struct btrfs_key key;
7424 rbtree_postorder_for_each_entry_safe(back, tmp,
7425 &rec->backref_tree, node) {
7426 /* Don't care about full backrefs (poor unloved backrefs) */
7427 if (back->full_backref || !back->is_data)
7430 dback = to_data_backref(back);
7432 /* We found this one, we don't need to do a lookup */
7433 if (dback->found_ref)
7436 key.objectid = dback->root;
7437 key.type = BTRFS_ROOT_ITEM_KEY;
7438 key.offset = (u64)-1;
7440 root = btrfs_read_fs_root(info, &key);
7442 /* No root, definitely a bad ref, skip */
7443 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7445 /* Other err, exit */
7447 return PTR_ERR(root);
7449 key.objectid = dback->owner;
7450 key.type = BTRFS_EXTENT_DATA_KEY;
7451 key.offset = dback->offset;
7452 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7454 btrfs_release_path(path);
7457 /* Didn't find it, we can carry on */
7462 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7463 struct btrfs_file_extent_item);
7464 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7465 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7466 btrfs_release_path(path);
7467 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7469 struct extent_record *tmp;
7470 tmp = container_of(cache, struct extent_record, cache);
7473 * If we found an extent record for the bytenr for this
7474 * particular backref then we can't add it to our
7475 * current extent record. We only want to add backrefs
7476 * that don't have a corresponding extent item in the
7477 * extent tree since they likely belong to this record
7478 * and we need to fix it if it doesn't match bytenrs.
7484 dback->found_ref += 1;
7485 dback->disk_bytenr = bytenr;
7486 dback->bytes = bytes;
7489 * Set this so the verify backref code knows not to trust the
7490 * values in this backref.
7499 * Record orphan data ref into corresponding root.
7501 * Return 0 if the extent item contains data ref and recorded.
7502 * Return 1 if the extent item contains no useful data ref
7503 * On that case, it may contains only shared_dataref or metadata backref
7504 * or the file extent exists(this should be handled by the extent bytenr
7506 * Return <0 if something goes wrong.
7508 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7509 struct extent_record *rec)
7511 struct btrfs_key key;
7512 struct btrfs_root *dest_root;
7513 struct extent_backref *back, *tmp;
7514 struct data_backref *dback;
7515 struct orphan_data_extent *orphan;
7516 struct btrfs_path *path;
7517 int recorded_data_ref = 0;
7522 path = btrfs_alloc_path();
7525 rbtree_postorder_for_each_entry_safe(back, tmp,
7526 &rec->backref_tree, node) {
7527 if (back->full_backref || !back->is_data ||
7528 !back->found_extent_tree)
7530 dback = to_data_backref(back);
7531 if (dback->found_ref)
7533 key.objectid = dback->root;
7534 key.type = BTRFS_ROOT_ITEM_KEY;
7535 key.offset = (u64)-1;
7537 dest_root = btrfs_read_fs_root(fs_info, &key);
7539 /* For non-exist root we just skip it */
7540 if (IS_ERR(dest_root) || !dest_root)
7543 key.objectid = dback->owner;
7544 key.type = BTRFS_EXTENT_DATA_KEY;
7545 key.offset = dback->offset;
7547 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7549 * For ret < 0, it's OK since the fs-tree may be corrupted,
7550 * we need to record it for inode/file extent rebuild.
7551 * For ret > 0, we record it only for file extent rebuild.
7552 * For ret == 0, the file extent exists but only bytenr
7553 * mismatch, let the original bytenr fix routine to handle,
7559 orphan = malloc(sizeof(*orphan));
7564 INIT_LIST_HEAD(&orphan->list);
7565 orphan->root = dback->root;
7566 orphan->objectid = dback->owner;
7567 orphan->offset = dback->offset;
7568 orphan->disk_bytenr = rec->cache.start;
7569 orphan->disk_len = rec->cache.size;
7570 list_add(&dest_root->orphan_data_extents, &orphan->list);
7571 recorded_data_ref = 1;
7574 btrfs_free_path(path);
7576 return !recorded_data_ref;
7582 * when an incorrect extent item is found, this will delete
7583 * all of the existing entries for it and recreate them
7584 * based on what the tree scan found.
7586 static int fixup_extent_refs(struct btrfs_fs_info *info,
7587 struct cache_tree *extent_cache,
7588 struct extent_record *rec)
7590 struct btrfs_trans_handle *trans = NULL;
7592 struct btrfs_path *path;
7593 struct cache_extent *cache;
7594 struct extent_backref *back, *tmp;
7598 if (rec->flag_block_full_backref)
7599 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7601 path = btrfs_alloc_path();
7605 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7607 * Sometimes the backrefs themselves are so broken they don't
7608 * get attached to any meaningful rec, so first go back and
7609 * check any of our backrefs that we couldn't find and throw
7610 * them into the list if we find the backref so that
7611 * verify_backrefs can figure out what to do.
7613 ret = find_possible_backrefs(info, path, extent_cache, rec);
7618 /* step one, make sure all of the backrefs agree */
7619 ret = verify_backrefs(info, path, rec);
7623 trans = btrfs_start_transaction(info->extent_root, 1);
7624 if (IS_ERR(trans)) {
7625 ret = PTR_ERR(trans);
7629 /* step two, delete all the existing records */
7630 ret = delete_extent_records(trans, info->extent_root, path,
7631 rec->start, rec->max_size);
7636 /* was this block corrupt? If so, don't add references to it */
7637 cache = lookup_cache_extent(info->corrupt_blocks,
7638 rec->start, rec->max_size);
7644 /* step three, recreate all the refs we did find */
7645 rbtree_postorder_for_each_entry_safe(back, tmp,
7646 &rec->backref_tree, node) {
7648 * if we didn't find any references, don't create a
7651 if (!back->found_ref)
7654 rec->bad_full_backref = 0;
7655 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7663 int err = btrfs_commit_transaction(trans, info->extent_root);
7668 btrfs_free_path(path);
7672 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7673 struct extent_record *rec)
7675 struct btrfs_trans_handle *trans;
7676 struct btrfs_root *root = fs_info->extent_root;
7677 struct btrfs_path *path;
7678 struct btrfs_extent_item *ei;
7679 struct btrfs_key key;
7683 key.objectid = rec->start;
7684 if (rec->metadata) {
7685 key.type = BTRFS_METADATA_ITEM_KEY;
7686 key.offset = rec->info_level;
7688 key.type = BTRFS_EXTENT_ITEM_KEY;
7689 key.offset = rec->max_size;
7692 path = btrfs_alloc_path();
7696 trans = btrfs_start_transaction(root, 0);
7697 if (IS_ERR(trans)) {
7698 btrfs_free_path(path);
7699 return PTR_ERR(trans);
7702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7704 btrfs_free_path(path);
7705 btrfs_commit_transaction(trans, root);
7708 fprintf(stderr, "Didn't find extent for %llu\n",
7709 (unsigned long long)rec->start);
7710 btrfs_free_path(path);
7711 btrfs_commit_transaction(trans, root);
7715 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7716 struct btrfs_extent_item);
7717 flags = btrfs_extent_flags(path->nodes[0], ei);
7718 if (rec->flag_block_full_backref) {
7719 fprintf(stderr, "setting full backref on %llu\n",
7720 (unsigned long long)key.objectid);
7721 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7723 fprintf(stderr, "clearing full backref on %llu\n",
7724 (unsigned long long)key.objectid);
7725 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7727 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7728 btrfs_mark_buffer_dirty(path->nodes[0]);
7729 btrfs_free_path(path);
7730 return btrfs_commit_transaction(trans, root);
7733 /* right now we only prune from the extent allocation tree */
7734 static int prune_one_block(struct btrfs_trans_handle *trans,
7735 struct btrfs_fs_info *info,
7736 struct btrfs_corrupt_block *corrupt)
7739 struct btrfs_path path;
7740 struct extent_buffer *eb;
7744 int level = corrupt->level + 1;
7746 btrfs_init_path(&path);
7748 /* we want to stop at the parent to our busted block */
7749 path.lowest_level = level;
7751 ret = btrfs_search_slot(trans, info->extent_root,
7752 &corrupt->key, &path, -1, 1);
7757 eb = path.nodes[level];
7764 * hopefully the search gave us the block we want to prune,
7765 * lets try that first
7767 slot = path.slots[level];
7768 found = btrfs_node_blockptr(eb, slot);
7769 if (found == corrupt->cache.start)
7772 nritems = btrfs_header_nritems(eb);
7774 /* the search failed, lets scan this node and hope we find it */
7775 for (slot = 0; slot < nritems; slot++) {
7776 found = btrfs_node_blockptr(eb, slot);
7777 if (found == corrupt->cache.start)
7781 * we couldn't find the bad block. TODO, search all the nodes for pointers
7784 if (eb == info->extent_root->node) {
7789 btrfs_release_path(&path);
7794 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7795 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7798 btrfs_release_path(&path);
7802 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7804 struct btrfs_trans_handle *trans = NULL;
7805 struct cache_extent *cache;
7806 struct btrfs_corrupt_block *corrupt;
7809 cache = search_cache_extent(info->corrupt_blocks, 0);
7813 trans = btrfs_start_transaction(info->extent_root, 1);
7815 return PTR_ERR(trans);
7817 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7818 prune_one_block(trans, info, corrupt);
7819 remove_cache_extent(info->corrupt_blocks, cache);
7822 return btrfs_commit_transaction(trans, info->extent_root);
7826 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7828 struct btrfs_block_group_cache *cache;
7833 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7834 &start, &end, EXTENT_DIRTY);
7837 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7843 cache = btrfs_lookup_first_block_group(fs_info, start);
7848 start = cache->key.objectid + cache->key.offset;
7852 static int check_extent_refs(struct btrfs_root *root,
7853 struct cache_tree *extent_cache)
7855 struct extent_record *rec;
7856 struct cache_extent *cache;
7865 * if we're doing a repair, we have to make sure
7866 * we don't allocate from the problem extents.
7867 * In the worst case, this will be all the
7870 cache = search_cache_extent(extent_cache, 0);
7872 rec = container_of(cache, struct extent_record, cache);
7873 set_extent_dirty(root->fs_info->excluded_extents,
7875 rec->start + rec->max_size - 1,
7877 cache = next_cache_extent(cache);
7880 /* pin down all the corrupted blocks too */
7881 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7883 set_extent_dirty(root->fs_info->excluded_extents,
7885 cache->start + cache->size - 1,
7887 cache = next_cache_extent(cache);
7889 prune_corrupt_blocks(root->fs_info);
7890 reset_cached_block_groups(root->fs_info);
7893 reset_cached_block_groups(root->fs_info);
7896 * We need to delete any duplicate entries we find first otherwise we
7897 * could mess up the extent tree when we have backrefs that actually
7898 * belong to a different extent item and not the weird duplicate one.
7900 while (repair && !list_empty(&duplicate_extents)) {
7901 rec = to_extent_record(duplicate_extents.next);
7902 list_del_init(&rec->list);
7904 /* Sometimes we can find a backref before we find an actual
7905 * extent, so we need to process it a little bit to see if there
7906 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7907 * if this is a backref screwup. If we need to delete stuff
7908 * process_duplicates() will return 0, otherwise it will return
7911 if (process_duplicates(root, extent_cache, rec))
7913 ret = delete_duplicate_records(root, rec);
7917 * delete_duplicate_records will return the number of entries
7918 * deleted, so if it's greater than 0 then we know we actually
7919 * did something and we need to remove.
7933 cache = search_cache_extent(extent_cache, 0);
7936 rec = container_of(cache, struct extent_record, cache);
7937 if (rec->num_duplicates) {
7938 fprintf(stderr, "extent item %llu has multiple extent "
7939 "items\n", (unsigned long long)rec->start);
7944 if (rec->refs != rec->extent_item_refs) {
7945 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7946 (unsigned long long)rec->start,
7947 (unsigned long long)rec->nr);
7948 fprintf(stderr, "extent item %llu, found %llu\n",
7949 (unsigned long long)rec->extent_item_refs,
7950 (unsigned long long)rec->refs);
7951 ret = record_orphan_data_extents(root->fs_info, rec);
7958 * we can't use the extent to repair file
7959 * extent, let the fallback method handle it.
7961 if (!fixed && repair) {
7962 ret = fixup_extent_refs(
7973 if (all_backpointers_checked(rec, 1)) {
7974 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7975 (unsigned long long)rec->start,
7976 (unsigned long long)rec->nr);
7978 if (!fixed && !recorded && repair) {
7979 ret = fixup_extent_refs(root->fs_info,
7988 if (!rec->owner_ref_checked) {
7989 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7990 (unsigned long long)rec->start,
7991 (unsigned long long)rec->nr);
7992 if (!fixed && !recorded && repair) {
7993 ret = fixup_extent_refs(root->fs_info,
8002 if (rec->bad_full_backref) {
8003 fprintf(stderr, "bad full backref, on [%llu]\n",
8004 (unsigned long long)rec->start);
8006 ret = fixup_extent_flags(root->fs_info, rec);
8015 * Although it's not a extent ref's problem, we reuse this
8016 * routine for error reporting.
8017 * No repair function yet.
8019 if (rec->crossing_stripes) {
8021 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8022 rec->start, rec->start + rec->max_size);
8027 if (rec->wrong_chunk_type) {
8029 "bad extent [%llu, %llu), type mismatch with chunk\n",
8030 rec->start, rec->start + rec->max_size);
8035 remove_cache_extent(extent_cache, cache);
8036 free_all_extent_backrefs(rec);
8037 if (!init_extent_tree && repair && (!cur_err || fixed))
8038 clear_extent_dirty(root->fs_info->excluded_extents,
8040 rec->start + rec->max_size - 1,
8046 if (ret && ret != -EAGAIN) {
8047 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8050 struct btrfs_trans_handle *trans;
8052 root = root->fs_info->extent_root;
8053 trans = btrfs_start_transaction(root, 1);
8054 if (IS_ERR(trans)) {
8055 ret = PTR_ERR(trans);
8059 btrfs_fix_block_accounting(trans, root);
8060 ret = btrfs_commit_transaction(trans, root);
8065 fprintf(stderr, "repaired damaged extent references\n");
8071 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8075 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8076 stripe_size = length;
8077 stripe_size /= num_stripes;
8078 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8079 stripe_size = length * 2;
8080 stripe_size /= num_stripes;
8081 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8082 stripe_size = length;
8083 stripe_size /= (num_stripes - 1);
8084 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8085 stripe_size = length;
8086 stripe_size /= (num_stripes - 2);
8088 stripe_size = length;
8094 * Check the chunk with its block group/dev list ref:
8095 * Return 0 if all refs seems valid.
8096 * Return 1 if part of refs seems valid, need later check for rebuild ref
8097 * like missing block group and needs to search extent tree to rebuild them.
8098 * Return -1 if essential refs are missing and unable to rebuild.
8100 static int check_chunk_refs(struct chunk_record *chunk_rec,
8101 struct block_group_tree *block_group_cache,
8102 struct device_extent_tree *dev_extent_cache,
8105 struct cache_extent *block_group_item;
8106 struct block_group_record *block_group_rec;
8107 struct cache_extent *dev_extent_item;
8108 struct device_extent_record *dev_extent_rec;
8112 int metadump_v2 = 0;
8116 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8119 if (block_group_item) {
8120 block_group_rec = container_of(block_group_item,
8121 struct block_group_record,
8123 if (chunk_rec->length != block_group_rec->offset ||
8124 chunk_rec->offset != block_group_rec->objectid ||
8126 chunk_rec->type_flags != block_group_rec->flags)) {
8129 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8130 chunk_rec->objectid,
8135 chunk_rec->type_flags,
8136 block_group_rec->objectid,
8137 block_group_rec->type,
8138 block_group_rec->offset,
8139 block_group_rec->offset,
8140 block_group_rec->objectid,
8141 block_group_rec->flags);
8144 list_del_init(&block_group_rec->list);
8145 chunk_rec->bg_rec = block_group_rec;
8150 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8151 chunk_rec->objectid,
8156 chunk_rec->type_flags);
8163 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8164 chunk_rec->num_stripes);
8165 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8166 devid = chunk_rec->stripes[i].devid;
8167 offset = chunk_rec->stripes[i].offset;
8168 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8169 devid, offset, length);
8170 if (dev_extent_item) {
8171 dev_extent_rec = container_of(dev_extent_item,
8172 struct device_extent_record,
8174 if (dev_extent_rec->objectid != devid ||
8175 dev_extent_rec->offset != offset ||
8176 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8177 dev_extent_rec->length != length) {
8180 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8181 chunk_rec->objectid,
8184 chunk_rec->stripes[i].devid,
8185 chunk_rec->stripes[i].offset,
8186 dev_extent_rec->objectid,
8187 dev_extent_rec->offset,
8188 dev_extent_rec->length);
8191 list_move(&dev_extent_rec->chunk_list,
8192 &chunk_rec->dextents);
8197 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8198 chunk_rec->objectid,
8201 chunk_rec->stripes[i].devid,
8202 chunk_rec->stripes[i].offset);
8209 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8210 int check_chunks(struct cache_tree *chunk_cache,
8211 struct block_group_tree *block_group_cache,
8212 struct device_extent_tree *dev_extent_cache,
8213 struct list_head *good, struct list_head *bad,
8214 struct list_head *rebuild, int silent)
8216 struct cache_extent *chunk_item;
8217 struct chunk_record *chunk_rec;
8218 struct block_group_record *bg_rec;
8219 struct device_extent_record *dext_rec;
8223 chunk_item = first_cache_extent(chunk_cache);
8224 while (chunk_item) {
8225 chunk_rec = container_of(chunk_item, struct chunk_record,
8227 err = check_chunk_refs(chunk_rec, block_group_cache,
8228 dev_extent_cache, silent);
8231 if (err == 0 && good)
8232 list_add_tail(&chunk_rec->list, good);
8233 if (err > 0 && rebuild)
8234 list_add_tail(&chunk_rec->list, rebuild);
8236 list_add_tail(&chunk_rec->list, bad);
8237 chunk_item = next_cache_extent(chunk_item);
8240 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8243 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8251 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8255 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8266 static int check_device_used(struct device_record *dev_rec,
8267 struct device_extent_tree *dext_cache)
8269 struct cache_extent *cache;
8270 struct device_extent_record *dev_extent_rec;
8273 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8275 dev_extent_rec = container_of(cache,
8276 struct device_extent_record,
8278 if (dev_extent_rec->objectid != dev_rec->devid)
8281 list_del_init(&dev_extent_rec->device_list);
8282 total_byte += dev_extent_rec->length;
8283 cache = next_cache_extent(cache);
8286 if (total_byte != dev_rec->byte_used) {
8288 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8289 total_byte, dev_rec->byte_used, dev_rec->objectid,
8290 dev_rec->type, dev_rec->offset);
8297 /* check btrfs_dev_item -> btrfs_dev_extent */
8298 static int check_devices(struct rb_root *dev_cache,
8299 struct device_extent_tree *dev_extent_cache)
8301 struct rb_node *dev_node;
8302 struct device_record *dev_rec;
8303 struct device_extent_record *dext_rec;
8307 dev_node = rb_first(dev_cache);
8309 dev_rec = container_of(dev_node, struct device_record, node);
8310 err = check_device_used(dev_rec, dev_extent_cache);
8314 dev_node = rb_next(dev_node);
8316 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8319 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8320 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8327 static int add_root_item_to_list(struct list_head *head,
8328 u64 objectid, u64 bytenr, u64 last_snapshot,
8329 u8 level, u8 drop_level,
8330 int level_size, struct btrfs_key *drop_key)
8333 struct root_item_record *ri_rec;
8334 ri_rec = malloc(sizeof(*ri_rec));
8337 ri_rec->bytenr = bytenr;
8338 ri_rec->objectid = objectid;
8339 ri_rec->level = level;
8340 ri_rec->level_size = level_size;
8341 ri_rec->drop_level = drop_level;
8342 ri_rec->last_snapshot = last_snapshot;
8344 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8345 list_add_tail(&ri_rec->list, head);
8350 static void free_root_item_list(struct list_head *list)
8352 struct root_item_record *ri_rec;
8354 while (!list_empty(list)) {
8355 ri_rec = list_first_entry(list, struct root_item_record,
8357 list_del_init(&ri_rec->list);
8362 static int deal_root_from_list(struct list_head *list,
8363 struct btrfs_root *root,
8364 struct block_info *bits,
8366 struct cache_tree *pending,
8367 struct cache_tree *seen,
8368 struct cache_tree *reada,
8369 struct cache_tree *nodes,
8370 struct cache_tree *extent_cache,
8371 struct cache_tree *chunk_cache,
8372 struct rb_root *dev_cache,
8373 struct block_group_tree *block_group_cache,
8374 struct device_extent_tree *dev_extent_cache)
8379 while (!list_empty(list)) {
8380 struct root_item_record *rec;
8381 struct extent_buffer *buf;
8382 rec = list_entry(list->next,
8383 struct root_item_record, list);
8385 buf = read_tree_block(root->fs_info->tree_root,
8386 rec->bytenr, rec->level_size, 0);
8387 if (!extent_buffer_uptodate(buf)) {
8388 free_extent_buffer(buf);
8392 add_root_to_pending(buf, extent_cache, pending,
8393 seen, nodes, rec->objectid);
8395 * To rebuild extent tree, we need deal with snapshot
8396 * one by one, otherwise we deal with node firstly which
8397 * can maximize readahead.
8400 ret = run_next_block(root, bits, bits_nr, &last,
8401 pending, seen, reada, nodes,
8402 extent_cache, chunk_cache,
8403 dev_cache, block_group_cache,
8404 dev_extent_cache, rec);
8408 free_extent_buffer(buf);
8409 list_del(&rec->list);
8415 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8416 reada, nodes, extent_cache, chunk_cache,
8417 dev_cache, block_group_cache,
8418 dev_extent_cache, NULL);
8428 static int check_chunks_and_extents(struct btrfs_root *root)
8430 struct rb_root dev_cache;
8431 struct cache_tree chunk_cache;
8432 struct block_group_tree block_group_cache;
8433 struct device_extent_tree dev_extent_cache;
8434 struct cache_tree extent_cache;
8435 struct cache_tree seen;
8436 struct cache_tree pending;
8437 struct cache_tree reada;
8438 struct cache_tree nodes;
8439 struct extent_io_tree excluded_extents;
8440 struct cache_tree corrupt_blocks;
8441 struct btrfs_path path;
8442 struct btrfs_key key;
8443 struct btrfs_key found_key;
8445 struct block_info *bits;
8447 struct extent_buffer *leaf;
8449 struct btrfs_root_item ri;
8450 struct list_head dropping_trees;
8451 struct list_head normal_trees;
8452 struct btrfs_root *root1;
8457 dev_cache = RB_ROOT;
8458 cache_tree_init(&chunk_cache);
8459 block_group_tree_init(&block_group_cache);
8460 device_extent_tree_init(&dev_extent_cache);
8462 cache_tree_init(&extent_cache);
8463 cache_tree_init(&seen);
8464 cache_tree_init(&pending);
8465 cache_tree_init(&nodes);
8466 cache_tree_init(&reada);
8467 cache_tree_init(&corrupt_blocks);
8468 extent_io_tree_init(&excluded_extents);
8469 INIT_LIST_HEAD(&dropping_trees);
8470 INIT_LIST_HEAD(&normal_trees);
8473 root->fs_info->excluded_extents = &excluded_extents;
8474 root->fs_info->fsck_extent_cache = &extent_cache;
8475 root->fs_info->free_extent_hook = free_extent_hook;
8476 root->fs_info->corrupt_blocks = &corrupt_blocks;
8480 bits = malloc(bits_nr * sizeof(struct block_info));
8486 if (ctx.progress_enabled) {
8487 ctx.tp = TASK_EXTENTS;
8488 task_start(ctx.info);
8492 root1 = root->fs_info->tree_root;
8493 level = btrfs_header_level(root1->node);
8494 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8495 root1->node->start, 0, level, 0,
8496 root1->nodesize, NULL);
8499 root1 = root->fs_info->chunk_root;
8500 level = btrfs_header_level(root1->node);
8501 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8502 root1->node->start, 0, level, 0,
8503 root1->nodesize, NULL);
8506 btrfs_init_path(&path);
8509 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8510 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8515 leaf = path.nodes[0];
8516 slot = path.slots[0];
8517 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8518 ret = btrfs_next_leaf(root, &path);
8521 leaf = path.nodes[0];
8522 slot = path.slots[0];
8524 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8525 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8526 unsigned long offset;
8529 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8530 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8531 last_snapshot = btrfs_root_last_snapshot(&ri);
8532 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8533 level = btrfs_root_level(&ri);
8534 level_size = root->nodesize;
8535 ret = add_root_item_to_list(&normal_trees,
8537 btrfs_root_bytenr(&ri),
8538 last_snapshot, level,
8539 0, level_size, NULL);
8543 level = btrfs_root_level(&ri);
8544 level_size = root->nodesize;
8545 objectid = found_key.objectid;
8546 btrfs_disk_key_to_cpu(&found_key,
8548 ret = add_root_item_to_list(&dropping_trees,
8550 btrfs_root_bytenr(&ri),
8551 last_snapshot, level,
8553 level_size, &found_key);
8560 btrfs_release_path(&path);
8563 * check_block can return -EAGAIN if it fixes something, please keep
8564 * this in mind when dealing with return values from these functions, if
8565 * we get -EAGAIN we want to fall through and restart the loop.
8567 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8568 &seen, &reada, &nodes, &extent_cache,
8569 &chunk_cache, &dev_cache, &block_group_cache,
8576 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8577 &pending, &seen, &reada, &nodes,
8578 &extent_cache, &chunk_cache, &dev_cache,
8579 &block_group_cache, &dev_extent_cache);
8586 ret = check_chunks(&chunk_cache, &block_group_cache,
8587 &dev_extent_cache, NULL, NULL, NULL, 0);
8594 ret = check_extent_refs(root, &extent_cache);
8601 ret = check_devices(&dev_cache, &dev_extent_cache);
8606 task_stop(ctx.info);
8608 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8609 extent_io_tree_cleanup(&excluded_extents);
8610 root->fs_info->fsck_extent_cache = NULL;
8611 root->fs_info->free_extent_hook = NULL;
8612 root->fs_info->corrupt_blocks = NULL;
8613 root->fs_info->excluded_extents = NULL;
8616 free_chunk_cache_tree(&chunk_cache);
8617 free_device_cache_tree(&dev_cache);
8618 free_block_group_tree(&block_group_cache);
8619 free_device_extent_tree(&dev_extent_cache);
8620 free_extent_cache_tree(&seen);
8621 free_extent_cache_tree(&pending);
8622 free_extent_cache_tree(&reada);
8623 free_extent_cache_tree(&nodes);
8626 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8627 free_extent_cache_tree(&seen);
8628 free_extent_cache_tree(&pending);
8629 free_extent_cache_tree(&reada);
8630 free_extent_cache_tree(&nodes);
8631 free_chunk_cache_tree(&chunk_cache);
8632 free_block_group_tree(&block_group_cache);
8633 free_device_cache_tree(&dev_cache);
8634 free_device_extent_tree(&dev_extent_cache);
8635 free_extent_record_cache(root->fs_info, &extent_cache);
8636 free_root_item_list(&normal_trees);
8637 free_root_item_list(&dropping_trees);
8638 extent_io_tree_cleanup(&excluded_extents);
8643 * Check backrefs of a tree block given by @bytenr or @eb.
8645 * @root: the root containing the @bytenr or @eb
8646 * @eb: tree block extent buffer, can be NULL
8647 * @bytenr: bytenr of the tree block to search
8648 * @level: tree level of the tree block
8649 * @owner: owner of the tree block
8651 * Return >0 for any error found and output error message
8652 * Return 0 for no error found
8654 static int check_tree_block_ref(struct btrfs_root *root,
8655 struct extent_buffer *eb, u64 bytenr,
8656 int level, u64 owner)
8658 struct btrfs_key key;
8659 struct btrfs_root *extent_root = root->fs_info->extent_root;
8660 struct btrfs_path path;
8661 struct btrfs_extent_item *ei;
8662 struct btrfs_extent_inline_ref *iref;
8663 struct extent_buffer *leaf;
8669 u32 nodesize = root->nodesize;
8676 btrfs_init_path(&path);
8677 key.objectid = bytenr;
8678 if (btrfs_fs_incompat(root->fs_info,
8679 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8680 key.type = BTRFS_METADATA_ITEM_KEY;
8682 key.type = BTRFS_EXTENT_ITEM_KEY;
8683 key.offset = (u64)-1;
8685 /* Search for the backref in extent tree */
8686 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8688 err |= BACKREF_MISSING;
8691 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8693 err |= BACKREF_MISSING;
8697 leaf = path.nodes[0];
8698 slot = path.slots[0];
8699 btrfs_item_key_to_cpu(leaf, &key, slot);
8701 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8703 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8704 skinny_level = (int)key.offset;
8705 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8707 struct btrfs_tree_block_info *info;
8709 info = (struct btrfs_tree_block_info *)(ei + 1);
8710 skinny_level = btrfs_tree_block_level(leaf, info);
8711 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8718 if (!(btrfs_extent_flags(leaf, ei) &
8719 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8721 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8722 key.objectid, nodesize,
8723 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8724 err = BACKREF_MISMATCH;
8726 header_gen = btrfs_header_generation(eb);
8727 extent_gen = btrfs_extent_generation(leaf, ei);
8728 if (header_gen != extent_gen) {
8730 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8731 key.objectid, nodesize, header_gen,
8733 err = BACKREF_MISMATCH;
8735 if (level != skinny_level) {
8737 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8738 key.objectid, nodesize, level, skinny_level);
8739 err = BACKREF_MISMATCH;
8741 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8743 "extent[%llu %u] is referred by other roots than %llu",
8744 key.objectid, nodesize, root->objectid);
8745 err = BACKREF_MISMATCH;
8750 * Iterate the extent/metadata item to find the exact backref
8752 item_size = btrfs_item_size_nr(leaf, slot);
8753 ptr = (unsigned long)iref;
8754 end = (unsigned long)ei + item_size;
8756 iref = (struct btrfs_extent_inline_ref *)ptr;
8757 type = btrfs_extent_inline_ref_type(leaf, iref);
8758 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8760 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8761 (offset == root->objectid || offset == owner)) {
8763 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8764 /* Check if the backref points to valid referencer */
8765 found_ref = !check_tree_block_ref(root, NULL, offset,
8771 ptr += btrfs_extent_inline_ref_size(type);
8775 * Inlined extent item doesn't have what we need, check
8776 * TREE_BLOCK_REF_KEY
8779 btrfs_release_path(&path);
8780 key.objectid = bytenr;
8781 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8782 key.offset = root->objectid;
8784 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8789 err |= BACKREF_MISSING;
8791 btrfs_release_path(&path);
8792 if (eb && (err & BACKREF_MISSING))
8793 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8794 bytenr, nodesize, owner, level);
8799 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8801 * Return >0 any error found and output error message
8802 * Return 0 for no error found
8804 static int check_extent_data_item(struct btrfs_root *root,
8805 struct extent_buffer *eb, int slot)
8807 struct btrfs_file_extent_item *fi;
8808 struct btrfs_path path;
8809 struct btrfs_root *extent_root = root->fs_info->extent_root;
8810 struct btrfs_key fi_key;
8811 struct btrfs_key dbref_key;
8812 struct extent_buffer *leaf;
8813 struct btrfs_extent_item *ei;
8814 struct btrfs_extent_inline_ref *iref;
8815 struct btrfs_extent_data_ref *dref;
8817 u64 file_extent_gen;
8820 u64 extent_num_bytes;
8828 int found_dbackref = 0;
8832 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8833 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8834 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8836 /* Nothing to check for hole and inline data extents */
8837 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8838 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8841 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8842 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8843 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8845 /* Check unaligned disk_num_bytes and num_bytes */
8846 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8848 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8849 fi_key.objectid, fi_key.offset, disk_num_bytes,
8851 err |= BYTES_UNALIGNED;
8853 data_bytes_allocated += disk_num_bytes;
8855 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8857 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8858 fi_key.objectid, fi_key.offset, extent_num_bytes,
8860 err |= BYTES_UNALIGNED;
8862 data_bytes_referenced += extent_num_bytes;
8864 owner = btrfs_header_owner(eb);
8866 /* Check the extent item of the file extent in extent tree */
8867 btrfs_init_path(&path);
8868 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8869 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8870 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8872 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8874 err |= BACKREF_MISSING;
8878 leaf = path.nodes[0];
8879 slot = path.slots[0];
8880 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8882 extent_flags = btrfs_extent_flags(leaf, ei);
8883 extent_gen = btrfs_extent_generation(leaf, ei);
8885 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8887 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8888 disk_bytenr, disk_num_bytes,
8889 BTRFS_EXTENT_FLAG_DATA);
8890 err |= BACKREF_MISMATCH;
8893 if (file_extent_gen < extent_gen) {
8895 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8896 disk_bytenr, disk_num_bytes, file_extent_gen,
8898 err |= BACKREF_MISMATCH;
8901 /* Check data backref inside that extent item */
8902 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8903 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8904 ptr = (unsigned long)iref;
8905 end = (unsigned long)ei + item_size;
8907 iref = (struct btrfs_extent_inline_ref *)ptr;
8908 type = btrfs_extent_inline_ref_type(leaf, iref);
8909 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8911 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8912 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8913 if (ref_root == owner || ref_root == root->objectid)
8915 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8916 found_dbackref = !check_tree_block_ref(root, NULL,
8917 btrfs_extent_inline_ref_offset(leaf, iref),
8923 ptr += btrfs_extent_inline_ref_size(type);
8926 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8927 if (!found_dbackref) {
8928 btrfs_release_path(&path);
8930 btrfs_init_path(&path);
8931 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8932 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8933 dbref_key.offset = hash_extent_data_ref(root->objectid,
8934 fi_key.objectid, fi_key.offset);
8936 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8937 &dbref_key, &path, 0, 0);
8942 if (!found_dbackref)
8943 err |= BACKREF_MISSING;
8945 btrfs_release_path(&path);
8946 if (err & BACKREF_MISSING) {
8947 error("data extent[%llu %llu] backref lost",
8948 disk_bytenr, disk_num_bytes);
8954 * Get real tree block level for the case like shared block
8955 * Return >= 0 as tree level
8956 * Return <0 for error
8958 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8960 struct extent_buffer *eb;
8961 struct btrfs_path path;
8962 struct btrfs_key key;
8963 struct btrfs_extent_item *ei;
8966 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8971 /* Search extent tree for extent generation and level */
8972 key.objectid = bytenr;
8973 key.type = BTRFS_METADATA_ITEM_KEY;
8974 key.offset = (u64)-1;
8976 btrfs_init_path(&path);
8977 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8980 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8988 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8989 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8990 struct btrfs_extent_item);
8991 flags = btrfs_extent_flags(path.nodes[0], ei);
8992 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8997 /* Get transid for later read_tree_block() check */
8998 transid = btrfs_extent_generation(path.nodes[0], ei);
9000 /* Get backref level as one source */
9001 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9002 backref_level = key.offset;
9004 struct btrfs_tree_block_info *info;
9006 info = (struct btrfs_tree_block_info *)(ei + 1);
9007 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9009 btrfs_release_path(&path);
9011 /* Get level from tree block as an alternative source */
9012 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9013 if (!extent_buffer_uptodate(eb)) {
9014 free_extent_buffer(eb);
9017 header_level = btrfs_header_level(eb);
9018 free_extent_buffer(eb);
9020 if (header_level != backref_level)
9022 return header_level;
9025 btrfs_release_path(&path);
9030 * Check if a tree block backref is valid (points to a valid tree block)
9031 * if level == -1, level will be resolved
9032 * Return >0 for any error found and print error message
9034 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9035 u64 bytenr, int level)
9037 struct btrfs_root *root;
9038 struct btrfs_key key;
9039 struct btrfs_path path;
9040 struct extent_buffer *eb;
9041 struct extent_buffer *node;
9042 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9046 /* Query level for level == -1 special case */
9048 level = query_tree_block_level(fs_info, bytenr);
9050 err |= REFERENCER_MISSING;
9054 key.objectid = root_id;
9055 key.type = BTRFS_ROOT_ITEM_KEY;
9056 key.offset = (u64)-1;
9058 root = btrfs_read_fs_root(fs_info, &key);
9060 err |= REFERENCER_MISSING;
9064 /* Read out the tree block to get item/node key */
9065 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9066 if (!extent_buffer_uptodate(eb)) {
9067 err |= REFERENCER_MISSING;
9068 free_extent_buffer(eb);
9072 /* Empty tree, no need to check key */
9073 if (!btrfs_header_nritems(eb) && !level) {
9074 free_extent_buffer(eb);
9079 btrfs_node_key_to_cpu(eb, &key, 0);
9081 btrfs_item_key_to_cpu(eb, &key, 0);
9083 free_extent_buffer(eb);
9085 btrfs_init_path(&path);
9086 /* Search with the first key, to ensure we can reach it */
9087 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9089 err |= REFERENCER_MISSING;
9093 node = path.nodes[level];
9094 if (btrfs_header_bytenr(node) != bytenr) {
9096 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9097 bytenr, nodesize, bytenr,
9098 btrfs_header_bytenr(node));
9099 err |= REFERENCER_MISMATCH;
9101 if (btrfs_header_level(node) != level) {
9103 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9104 bytenr, nodesize, level,
9105 btrfs_header_level(node));
9106 err |= REFERENCER_MISMATCH;
9110 btrfs_release_path(&path);
9112 if (err & REFERENCER_MISSING) {
9114 error("extent [%llu %d] lost referencer (owner: %llu)",
9115 bytenr, nodesize, root_id);
9118 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9119 bytenr, nodesize, root_id, level);
9126 * Check referencer for shared block backref
9127 * If level == -1, this function will resolve the level.
9129 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9130 u64 parent, u64 bytenr, int level)
9132 struct extent_buffer *eb;
9133 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9135 int found_parent = 0;
9138 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9139 if (!extent_buffer_uptodate(eb))
9143 level = query_tree_block_level(fs_info, bytenr);
9147 if (level + 1 != btrfs_header_level(eb))
9150 nr = btrfs_header_nritems(eb);
9151 for (i = 0; i < nr; i++) {
9152 if (bytenr == btrfs_node_blockptr(eb, i)) {
9158 free_extent_buffer(eb);
9159 if (!found_parent) {
9161 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9162 bytenr, nodesize, parent, level);
9163 return REFERENCER_MISSING;
9169 * Check referencer for normal (inlined) data ref
9170 * If len == 0, it will be resolved by searching in extent tree
9172 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9173 u64 root_id, u64 objectid, u64 offset,
9174 u64 bytenr, u64 len, u32 count)
9176 struct btrfs_root *root;
9177 struct btrfs_root *extent_root = fs_info->extent_root;
9178 struct btrfs_key key;
9179 struct btrfs_path path;
9180 struct extent_buffer *leaf;
9181 struct btrfs_file_extent_item *fi;
9182 u32 found_count = 0;
9187 key.objectid = bytenr;
9188 key.type = BTRFS_EXTENT_ITEM_KEY;
9189 key.offset = (u64)-1;
9191 btrfs_init_path(&path);
9192 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9195 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9198 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9199 if (key.objectid != bytenr ||
9200 key.type != BTRFS_EXTENT_ITEM_KEY)
9203 btrfs_release_path(&path);
9205 key.objectid = root_id;
9206 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9207 key.offset = (u64)-1;
9208 btrfs_init_path(&path);
9210 root = btrfs_read_fs_root(fs_info, &key);
9214 key.objectid = objectid;
9215 key.type = BTRFS_EXTENT_DATA_KEY;
9217 * It can be nasty as data backref offset is
9218 * file offset - file extent offset, which is smaller or
9219 * equal to original backref offset. The only special case is
9220 * overflow. So we need to special check and do further search.
9222 key.offset = offset & (1ULL << 63) ? 0 : offset;
9224 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9229 * Search afterwards to get correct one
9230 * NOTE: As we must do a comprehensive check on the data backref to
9231 * make sure the dref count also matches, we must iterate all file
9232 * extents for that inode.
9235 leaf = path.nodes[0];
9236 slot = path.slots[0];
9238 btrfs_item_key_to_cpu(leaf, &key, slot);
9239 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9241 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9243 * Except normal disk bytenr and disk num bytes, we still
9244 * need to do extra check on dbackref offset as
9245 * dbackref offset = file_offset - file_extent_offset
9247 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9248 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9249 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9253 ret = btrfs_next_item(root, &path);
9258 btrfs_release_path(&path);
9259 if (found_count != count) {
9261 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9262 bytenr, len, root_id, objectid, offset, count, found_count);
9263 return REFERENCER_MISSING;
9269 * Check if the referencer of a shared data backref exists
9271 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9272 u64 parent, u64 bytenr)
9274 struct extent_buffer *eb;
9275 struct btrfs_key key;
9276 struct btrfs_file_extent_item *fi;
9277 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9279 int found_parent = 0;
9282 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9283 if (!extent_buffer_uptodate(eb))
9286 nr = btrfs_header_nritems(eb);
9287 for (i = 0; i < nr; i++) {
9288 btrfs_item_key_to_cpu(eb, &key, i);
9289 if (key.type != BTRFS_EXTENT_DATA_KEY)
9292 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9293 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9296 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9303 free_extent_buffer(eb);
9304 if (!found_parent) {
9305 error("shared extent %llu referencer lost (parent: %llu)",
9307 return REFERENCER_MISSING;
9313 * This function will check a given extent item, including its backref and
9314 * itself (like crossing stripe boundary and type)
9316 * Since we don't use extent_record anymore, introduce new error bit
9318 static int check_extent_item(struct btrfs_fs_info *fs_info,
9319 struct extent_buffer *eb, int slot)
9321 struct btrfs_extent_item *ei;
9322 struct btrfs_extent_inline_ref *iref;
9323 struct btrfs_extent_data_ref *dref;
9327 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9328 u32 item_size = btrfs_item_size_nr(eb, slot);
9333 struct btrfs_key key;
9337 btrfs_item_key_to_cpu(eb, &key, slot);
9338 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9339 bytes_used += key.offset;
9341 bytes_used += nodesize;
9343 if (item_size < sizeof(*ei)) {
9345 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9346 * old thing when on disk format is still un-determined.
9347 * No need to care about it anymore
9349 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9353 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9354 flags = btrfs_extent_flags(eb, ei);
9356 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9358 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9359 error("bad metadata [%llu, %llu) crossing stripe boundary",
9360 key.objectid, key.objectid + nodesize);
9361 err |= CROSSING_STRIPE_BOUNDARY;
9364 ptr = (unsigned long)(ei + 1);
9366 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9367 /* Old EXTENT_ITEM metadata */
9368 struct btrfs_tree_block_info *info;
9370 info = (struct btrfs_tree_block_info *)ptr;
9371 level = btrfs_tree_block_level(eb, info);
9372 ptr += sizeof(struct btrfs_tree_block_info);
9374 /* New METADATA_ITEM */
9377 end = (unsigned long)ei + item_size;
9380 err |= ITEM_SIZE_MISMATCH;
9384 /* Now check every backref in this extent item */
9386 iref = (struct btrfs_extent_inline_ref *)ptr;
9387 type = btrfs_extent_inline_ref_type(eb, iref);
9388 offset = btrfs_extent_inline_ref_offset(eb, iref);
9390 case BTRFS_TREE_BLOCK_REF_KEY:
9391 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9395 case BTRFS_SHARED_BLOCK_REF_KEY:
9396 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9400 case BTRFS_EXTENT_DATA_REF_KEY:
9401 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9402 ret = check_extent_data_backref(fs_info,
9403 btrfs_extent_data_ref_root(eb, dref),
9404 btrfs_extent_data_ref_objectid(eb, dref),
9405 btrfs_extent_data_ref_offset(eb, dref),
9406 key.objectid, key.offset,
9407 btrfs_extent_data_ref_count(eb, dref));
9410 case BTRFS_SHARED_DATA_REF_KEY:
9411 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9415 error("extent[%llu %d %llu] has unknown ref type: %d",
9416 key.objectid, key.type, key.offset, type);
9417 err |= UNKNOWN_TYPE;
9421 ptr += btrfs_extent_inline_ref_size(type);
9430 * Check if a dev extent item is referred correctly by its chunk
9432 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9433 struct extent_buffer *eb, int slot)
9435 struct btrfs_root *chunk_root = fs_info->chunk_root;
9436 struct btrfs_dev_extent *ptr;
9437 struct btrfs_path path;
9438 struct btrfs_key chunk_key;
9439 struct btrfs_key devext_key;
9440 struct btrfs_chunk *chunk;
9441 struct extent_buffer *l;
9445 int found_chunk = 0;
9448 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9449 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9450 length = btrfs_dev_extent_length(eb, ptr);
9452 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9453 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9454 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9456 btrfs_init_path(&path);
9457 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9462 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9463 if (btrfs_chunk_length(l, chunk) != length)
9466 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9467 for (i = 0; i < num_stripes; i++) {
9468 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9469 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9471 if (devid == devext_key.objectid &&
9472 offset == devext_key.offset) {
9478 btrfs_release_path(&path);
9481 "device extent[%llu, %llu, %llu] did not find the related chunk",
9482 devext_key.objectid, devext_key.offset, length);
9483 return REFERENCER_MISSING;
9489 * Check if the used space is correct with the dev item
9491 static int check_dev_item(struct btrfs_fs_info *fs_info,
9492 struct extent_buffer *eb, int slot)
9494 struct btrfs_root *dev_root = fs_info->dev_root;
9495 struct btrfs_dev_item *dev_item;
9496 struct btrfs_path path;
9497 struct btrfs_key key;
9498 struct btrfs_dev_extent *ptr;
9504 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9505 dev_id = btrfs_device_id(eb, dev_item);
9506 used = btrfs_device_bytes_used(eb, dev_item);
9508 key.objectid = dev_id;
9509 key.type = BTRFS_DEV_EXTENT_KEY;
9512 btrfs_init_path(&path);
9513 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9515 btrfs_item_key_to_cpu(eb, &key, slot);
9516 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9517 key.objectid, key.type, key.offset);
9518 btrfs_release_path(&path);
9519 return REFERENCER_MISSING;
9522 /* Iterate dev_extents to calculate the used space of a device */
9524 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9526 if (key.objectid > dev_id)
9528 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9531 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9532 struct btrfs_dev_extent);
9533 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9535 ret = btrfs_next_item(dev_root, &path);
9539 btrfs_release_path(&path);
9541 if (used != total) {
9542 btrfs_item_key_to_cpu(eb, &key, slot);
9544 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9545 total, used, BTRFS_ROOT_TREE_OBJECTID,
9546 BTRFS_DEV_EXTENT_KEY, dev_id);
9547 return ACCOUNTING_MISMATCH;
9553 * Check a block group item with its referener (chunk) and its used space
9554 * with extent/metadata item
9556 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9557 struct extent_buffer *eb, int slot)
9559 struct btrfs_root *extent_root = fs_info->extent_root;
9560 struct btrfs_root *chunk_root = fs_info->chunk_root;
9561 struct btrfs_block_group_item *bi;
9562 struct btrfs_block_group_item bg_item;
9563 struct btrfs_path path;
9564 struct btrfs_key bg_key;
9565 struct btrfs_key chunk_key;
9566 struct btrfs_key extent_key;
9567 struct btrfs_chunk *chunk;
9568 struct extent_buffer *leaf;
9569 struct btrfs_extent_item *ei;
9570 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9578 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9579 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9580 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9581 used = btrfs_block_group_used(&bg_item);
9582 bg_flags = btrfs_block_group_flags(&bg_item);
9584 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9585 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9586 chunk_key.offset = bg_key.objectid;
9588 btrfs_init_path(&path);
9589 /* Search for the referencer chunk */
9590 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9593 "block group[%llu %llu] did not find the related chunk item",
9594 bg_key.objectid, bg_key.offset);
9595 err |= REFERENCER_MISSING;
9597 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9598 struct btrfs_chunk);
9599 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9602 "block group[%llu %llu] related chunk item length does not match",
9603 bg_key.objectid, bg_key.offset);
9604 err |= REFERENCER_MISMATCH;
9607 btrfs_release_path(&path);
9609 /* Search from the block group bytenr */
9610 extent_key.objectid = bg_key.objectid;
9611 extent_key.type = 0;
9612 extent_key.offset = 0;
9614 btrfs_init_path(&path);
9615 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9619 /* Iterate extent tree to account used space */
9621 leaf = path.nodes[0];
9622 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9623 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9626 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9627 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9629 if (extent_key.objectid < bg_key.objectid)
9632 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9635 total += extent_key.offset;
9637 ei = btrfs_item_ptr(leaf, path.slots[0],
9638 struct btrfs_extent_item);
9639 flags = btrfs_extent_flags(leaf, ei);
9640 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9641 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9643 "bad extent[%llu, %llu) type mismatch with chunk",
9644 extent_key.objectid,
9645 extent_key.objectid + extent_key.offset);
9646 err |= CHUNK_TYPE_MISMATCH;
9648 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9649 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9650 BTRFS_BLOCK_GROUP_METADATA))) {
9652 "bad extent[%llu, %llu) type mismatch with chunk",
9653 extent_key.objectid,
9654 extent_key.objectid + nodesize);
9655 err |= CHUNK_TYPE_MISMATCH;
9659 ret = btrfs_next_item(extent_root, &path);
9665 btrfs_release_path(&path);
9667 if (total != used) {
9669 "block group[%llu %llu] used %llu but extent items used %llu",
9670 bg_key.objectid, bg_key.offset, used, total);
9671 err |= ACCOUNTING_MISMATCH;
9677 * Check a chunk item.
9678 * Including checking all referred dev_extents and block group
9680 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9681 struct extent_buffer *eb, int slot)
9683 struct btrfs_root *extent_root = fs_info->extent_root;
9684 struct btrfs_root *dev_root = fs_info->dev_root;
9685 struct btrfs_path path;
9686 struct btrfs_key chunk_key;
9687 struct btrfs_key bg_key;
9688 struct btrfs_key devext_key;
9689 struct btrfs_chunk *chunk;
9690 struct extent_buffer *leaf;
9691 struct btrfs_block_group_item *bi;
9692 struct btrfs_block_group_item bg_item;
9693 struct btrfs_dev_extent *ptr;
9694 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9706 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9707 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9708 length = btrfs_chunk_length(eb, chunk);
9709 chunk_end = chunk_key.offset + length;
9710 if (!IS_ALIGNED(length, sectorsize)) {
9711 error("chunk[%llu %llu) not aligned to %u",
9712 chunk_key.offset, chunk_end, sectorsize);
9713 err |= BYTES_UNALIGNED;
9717 type = btrfs_chunk_type(eb, chunk);
9718 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9719 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9720 error("chunk[%llu %llu) has no chunk type",
9721 chunk_key.offset, chunk_end);
9722 err |= UNKNOWN_TYPE;
9724 if (profile && (profile & (profile - 1))) {
9725 error("chunk[%llu %llu) multiple profiles detected: %llx",
9726 chunk_key.offset, chunk_end, profile);
9727 err |= UNKNOWN_TYPE;
9730 bg_key.objectid = chunk_key.offset;
9731 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9732 bg_key.offset = length;
9734 btrfs_init_path(&path);
9735 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9738 "chunk[%llu %llu) did not find the related block group item",
9739 chunk_key.offset, chunk_end);
9740 err |= REFERENCER_MISSING;
9742 leaf = path.nodes[0];
9743 bi = btrfs_item_ptr(leaf, path.slots[0],
9744 struct btrfs_block_group_item);
9745 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9747 if (btrfs_block_group_flags(&bg_item) != type) {
9749 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9750 chunk_key.offset, chunk_end, type,
9751 btrfs_block_group_flags(&bg_item));
9752 err |= REFERENCER_MISSING;
9756 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9757 for (i = 0; i < num_stripes; i++) {
9758 btrfs_release_path(&path);
9759 btrfs_init_path(&path);
9760 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9761 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9762 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9764 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9769 leaf = path.nodes[0];
9770 ptr = btrfs_item_ptr(leaf, path.slots[0],
9771 struct btrfs_dev_extent);
9772 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9773 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9774 if (objectid != chunk_key.objectid ||
9775 offset != chunk_key.offset ||
9776 btrfs_dev_extent_length(leaf, ptr) != length)
9780 err |= BACKREF_MISSING;
9782 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9783 chunk_key.objectid, chunk_end, i);
9786 btrfs_release_path(&path);
9792 * Main entry function to check known items and update related accounting info
9794 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9796 struct btrfs_fs_info *fs_info = root->fs_info;
9797 struct btrfs_key key;
9800 struct btrfs_extent_data_ref *dref;
9805 btrfs_item_key_to_cpu(eb, &key, slot);
9806 type = btrfs_key_type(&key);
9809 case BTRFS_EXTENT_DATA_KEY:
9810 ret = check_extent_data_item(root, eb, slot);
9813 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9814 ret = check_block_group_item(fs_info, eb, slot);
9817 case BTRFS_DEV_ITEM_KEY:
9818 ret = check_dev_item(fs_info, eb, slot);
9821 case BTRFS_CHUNK_ITEM_KEY:
9822 ret = check_chunk_item(fs_info, eb, slot);
9825 case BTRFS_DEV_EXTENT_KEY:
9826 ret = check_dev_extent_item(fs_info, eb, slot);
9829 case BTRFS_EXTENT_ITEM_KEY:
9830 case BTRFS_METADATA_ITEM_KEY:
9831 ret = check_extent_item(fs_info, eb, slot);
9834 case BTRFS_EXTENT_CSUM_KEY:
9835 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9837 case BTRFS_TREE_BLOCK_REF_KEY:
9838 ret = check_tree_block_backref(fs_info, key.offset,
9842 case BTRFS_EXTENT_DATA_REF_KEY:
9843 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9844 ret = check_extent_data_backref(fs_info,
9845 btrfs_extent_data_ref_root(eb, dref),
9846 btrfs_extent_data_ref_objectid(eb, dref),
9847 btrfs_extent_data_ref_offset(eb, dref),
9849 btrfs_extent_data_ref_count(eb, dref));
9852 case BTRFS_SHARED_BLOCK_REF_KEY:
9853 ret = check_shared_block_backref(fs_info, key.offset,
9857 case BTRFS_SHARED_DATA_REF_KEY:
9858 ret = check_shared_data_backref(fs_info, key.offset,
9866 if (++slot < btrfs_header_nritems(eb))
9873 * Helper function for later fs/subvol tree check. To determine if a tree
9874 * block should be checked.
9875 * This function will ensure only the direct referencer with lowest rootid to
9876 * check a fs/subvolume tree block.
9878 * Backref check at extent tree would detect errors like missing subvolume
9879 * tree, so we can do aggressive check to reduce duplicated checks.
9881 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9883 struct btrfs_root *extent_root = root->fs_info->extent_root;
9884 struct btrfs_key key;
9885 struct btrfs_path path;
9886 struct extent_buffer *leaf;
9888 struct btrfs_extent_item *ei;
9894 struct btrfs_extent_inline_ref *iref;
9897 btrfs_init_path(&path);
9898 key.objectid = btrfs_header_bytenr(eb);
9899 key.type = BTRFS_METADATA_ITEM_KEY;
9900 key.offset = (u64)-1;
9903 * Any failure in backref resolving means we can't determine
9904 * whom the tree block belongs to.
9905 * So in that case, we need to check that tree block
9907 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9911 ret = btrfs_previous_extent_item(extent_root, &path,
9912 btrfs_header_bytenr(eb));
9916 leaf = path.nodes[0];
9917 slot = path.slots[0];
9918 btrfs_item_key_to_cpu(leaf, &key, slot);
9919 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9921 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9922 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9924 struct btrfs_tree_block_info *info;
9926 info = (struct btrfs_tree_block_info *)(ei + 1);
9927 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9930 item_size = btrfs_item_size_nr(leaf, slot);
9931 ptr = (unsigned long)iref;
9932 end = (unsigned long)ei + item_size;
9934 iref = (struct btrfs_extent_inline_ref *)ptr;
9935 type = btrfs_extent_inline_ref_type(leaf, iref);
9936 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9939 * We only check the tree block if current root is
9940 * the lowest referencer of it.
9942 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9943 offset < root->objectid) {
9944 btrfs_release_path(&path);
9948 ptr += btrfs_extent_inline_ref_size(type);
9951 * Normally we should also check keyed tree block ref, but that may be
9952 * very time consuming. Inlined ref should already make us skip a lot
9953 * of refs now. So skip search keyed tree block ref.
9957 btrfs_release_path(&path);
9962 * Traversal function for tree block. We will do:
9963 * 1) Skip shared fs/subvolume tree blocks
9964 * 2) Update related bytes accounting
9965 * 3) Pre-order traversal
9967 static int traverse_tree_block(struct btrfs_root *root,
9968 struct extent_buffer *node)
9970 struct extent_buffer *eb;
9978 * Skip shared fs/subvolume tree block, in that case they will
9979 * be checked by referencer with lowest rootid
9981 if (is_fstree(root->objectid) && !should_check(root, node))
9984 /* Update bytes accounting */
9985 total_btree_bytes += node->len;
9986 if (fs_root_objectid(btrfs_header_owner(node)))
9987 total_fs_tree_bytes += node->len;
9988 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
9989 total_extent_tree_bytes += node->len;
9990 if (!found_old_backref &&
9991 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
9992 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
9993 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
9994 found_old_backref = 1;
9996 /* pre-order tranversal, check itself first */
9997 level = btrfs_header_level(node);
9998 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
9999 btrfs_header_level(node),
10000 btrfs_header_owner(node));
10004 "check %s failed root %llu bytenr %llu level %d, force continue check",
10005 level ? "node":"leaf", root->objectid,
10006 btrfs_header_bytenr(node), btrfs_header_level(node));
10009 btree_space_waste += btrfs_leaf_free_space(root, node);
10010 ret = check_leaf_items(root, node);
10015 nr = btrfs_header_nritems(node);
10016 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10017 sizeof(struct btrfs_key_ptr);
10019 /* Then check all its children */
10020 for (i = 0; i < nr; i++) {
10021 u64 blocknr = btrfs_node_blockptr(node, i);
10024 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10025 * to call the function itself.
10027 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10028 if (extent_buffer_uptodate(eb)) {
10029 ret = traverse_tree_block(root, eb);
10032 free_extent_buffer(eb);
10039 * Low memory usage version check_chunks_and_extents.
10041 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10043 struct btrfs_path path;
10044 struct btrfs_key key;
10045 struct btrfs_root *root1;
10046 struct btrfs_root *cur_root;
10050 root1 = root->fs_info->chunk_root;
10051 ret = traverse_tree_block(root1, root1->node);
10054 root1 = root->fs_info->tree_root;
10055 ret = traverse_tree_block(root1, root1->node);
10058 btrfs_init_path(&path);
10059 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10061 key.type = BTRFS_ROOT_ITEM_KEY;
10063 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10065 error("cannot find extent treet in tree_root");
10070 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10071 if (key.type != BTRFS_ROOT_ITEM_KEY)
10073 key.offset = (u64)-1;
10075 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10076 if (IS_ERR(cur_root) || !cur_root) {
10077 error("failed to read tree: %lld", key.objectid);
10081 ret = traverse_tree_block(cur_root, cur_root->node);
10085 ret = btrfs_next_item(root1, &path);
10091 btrfs_release_path(&path);
10095 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10096 struct btrfs_root *root, int overwrite)
10098 struct extent_buffer *c;
10099 struct extent_buffer *old = root->node;
10102 struct btrfs_disk_key disk_key = {0,0,0};
10108 extent_buffer_get(c);
10111 c = btrfs_alloc_free_block(trans, root,
10113 root->root_key.objectid,
10114 &disk_key, level, 0, 0);
10117 extent_buffer_get(c);
10121 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10122 btrfs_set_header_level(c, level);
10123 btrfs_set_header_bytenr(c, c->start);
10124 btrfs_set_header_generation(c, trans->transid);
10125 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10126 btrfs_set_header_owner(c, root->root_key.objectid);
10128 write_extent_buffer(c, root->fs_info->fsid,
10129 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10131 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10132 btrfs_header_chunk_tree_uuid(c),
10135 btrfs_mark_buffer_dirty(c);
10137 * this case can happen in the following case:
10139 * 1.overwrite previous root.
10141 * 2.reinit reloc data root, this is because we skip pin
10142 * down reloc data tree before which means we can allocate
10143 * same block bytenr here.
10145 if (old->start == c->start) {
10146 btrfs_set_root_generation(&root->root_item,
10148 root->root_item.level = btrfs_header_level(root->node);
10149 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10150 &root->root_key, &root->root_item);
10152 free_extent_buffer(c);
10156 free_extent_buffer(old);
10158 add_root_to_dirty_list(root);
10162 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10163 struct extent_buffer *eb, int tree_root)
10165 struct extent_buffer *tmp;
10166 struct btrfs_root_item *ri;
10167 struct btrfs_key key;
10170 int level = btrfs_header_level(eb);
10176 * If we have pinned this block before, don't pin it again.
10177 * This can not only avoid forever loop with broken filesystem
10178 * but also give us some speedups.
10180 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10181 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10184 btrfs_pin_extent(fs_info, eb->start, eb->len);
10186 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10187 nritems = btrfs_header_nritems(eb);
10188 for (i = 0; i < nritems; i++) {
10190 btrfs_item_key_to_cpu(eb, &key, i);
10191 if (key.type != BTRFS_ROOT_ITEM_KEY)
10193 /* Skip the extent root and reloc roots */
10194 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10195 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10196 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10198 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10199 bytenr = btrfs_disk_root_bytenr(eb, ri);
10202 * If at any point we start needing the real root we
10203 * will have to build a stump root for the root we are
10204 * in, but for now this doesn't actually use the root so
10205 * just pass in extent_root.
10207 tmp = read_tree_block(fs_info->extent_root, bytenr,
10209 if (!extent_buffer_uptodate(tmp)) {
10210 fprintf(stderr, "Error reading root block\n");
10213 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10214 free_extent_buffer(tmp);
10218 bytenr = btrfs_node_blockptr(eb, i);
10220 /* If we aren't the tree root don't read the block */
10221 if (level == 1 && !tree_root) {
10222 btrfs_pin_extent(fs_info, bytenr, nodesize);
10226 tmp = read_tree_block(fs_info->extent_root, bytenr,
10228 if (!extent_buffer_uptodate(tmp)) {
10229 fprintf(stderr, "Error reading tree block\n");
10232 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10233 free_extent_buffer(tmp);
10242 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10246 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10250 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10253 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10255 struct btrfs_block_group_cache *cache;
10256 struct btrfs_path *path;
10257 struct extent_buffer *leaf;
10258 struct btrfs_chunk *chunk;
10259 struct btrfs_key key;
10263 path = btrfs_alloc_path();
10268 key.type = BTRFS_CHUNK_ITEM_KEY;
10271 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10273 btrfs_free_path(path);
10278 * We do this in case the block groups were screwed up and had alloc
10279 * bits that aren't actually set on the chunks. This happens with
10280 * restored images every time and could happen in real life I guess.
10282 fs_info->avail_data_alloc_bits = 0;
10283 fs_info->avail_metadata_alloc_bits = 0;
10284 fs_info->avail_system_alloc_bits = 0;
10286 /* First we need to create the in-memory block groups */
10288 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10289 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10291 btrfs_free_path(path);
10299 leaf = path->nodes[0];
10300 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10301 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10306 chunk = btrfs_item_ptr(leaf, path->slots[0],
10307 struct btrfs_chunk);
10308 btrfs_add_block_group(fs_info, 0,
10309 btrfs_chunk_type(leaf, chunk),
10310 key.objectid, key.offset,
10311 btrfs_chunk_length(leaf, chunk));
10312 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10313 key.offset + btrfs_chunk_length(leaf, chunk),
10319 cache = btrfs_lookup_first_block_group(fs_info, start);
10323 start = cache->key.objectid + cache->key.offset;
10326 btrfs_free_path(path);
10330 static int reset_balance(struct btrfs_trans_handle *trans,
10331 struct btrfs_fs_info *fs_info)
10333 struct btrfs_root *root = fs_info->tree_root;
10334 struct btrfs_path *path;
10335 struct extent_buffer *leaf;
10336 struct btrfs_key key;
10337 int del_slot, del_nr = 0;
10341 path = btrfs_alloc_path();
10345 key.objectid = BTRFS_BALANCE_OBJECTID;
10346 key.type = BTRFS_BALANCE_ITEM_KEY;
10349 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10354 goto reinit_data_reloc;
10359 ret = btrfs_del_item(trans, root, path);
10362 btrfs_release_path(path);
10364 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10365 key.type = BTRFS_ROOT_ITEM_KEY;
10368 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10372 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10377 ret = btrfs_del_items(trans, root, path,
10384 btrfs_release_path(path);
10387 ret = btrfs_search_slot(trans, root, &key, path,
10394 leaf = path->nodes[0];
10395 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10396 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10398 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10403 del_slot = path->slots[0];
10412 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10416 btrfs_release_path(path);
10419 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10420 key.type = BTRFS_ROOT_ITEM_KEY;
10421 key.offset = (u64)-1;
10422 root = btrfs_read_fs_root(fs_info, &key);
10423 if (IS_ERR(root)) {
10424 fprintf(stderr, "Error reading data reloc tree\n");
10425 ret = PTR_ERR(root);
10428 record_root_in_trans(trans, root);
10429 ret = btrfs_fsck_reinit_root(trans, root, 0);
10432 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10434 btrfs_free_path(path);
10438 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10439 struct btrfs_fs_info *fs_info)
10445 * The only reason we don't do this is because right now we're just
10446 * walking the trees we find and pinning down their bytes, we don't look
10447 * at any of the leaves. In order to do mixed groups we'd have to check
10448 * the leaves of any fs roots and pin down the bytes for any file
10449 * extents we find. Not hard but why do it if we don't have to?
10451 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10452 fprintf(stderr, "We don't support re-initing the extent tree "
10453 "for mixed block groups yet, please notify a btrfs "
10454 "developer you want to do this so they can add this "
10455 "functionality.\n");
10460 * first we need to walk all of the trees except the extent tree and pin
10461 * down the bytes that are in use so we don't overwrite any existing
10464 ret = pin_metadata_blocks(fs_info);
10466 fprintf(stderr, "error pinning down used bytes\n");
10471 * Need to drop all the block groups since we're going to recreate all
10474 btrfs_free_block_groups(fs_info);
10475 ret = reset_block_groups(fs_info);
10477 fprintf(stderr, "error resetting the block groups\n");
10481 /* Ok we can allocate now, reinit the extent root */
10482 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10484 fprintf(stderr, "extent root initialization failed\n");
10486 * When the transaction code is updated we should end the
10487 * transaction, but for now progs only knows about commit so
10488 * just return an error.
10494 * Now we have all the in-memory block groups setup so we can make
10495 * allocations properly, and the metadata we care about is safe since we
10496 * pinned all of it above.
10499 struct btrfs_block_group_cache *cache;
10501 cache = btrfs_lookup_first_block_group(fs_info, start);
10504 start = cache->key.objectid + cache->key.offset;
10505 ret = btrfs_insert_item(trans, fs_info->extent_root,
10506 &cache->key, &cache->item,
10507 sizeof(cache->item));
10509 fprintf(stderr, "Error adding block group\n");
10512 btrfs_extent_post_op(trans, fs_info->extent_root);
10515 ret = reset_balance(trans, fs_info);
10517 fprintf(stderr, "error resetting the pending balance\n");
10522 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10524 struct btrfs_path *path;
10525 struct btrfs_trans_handle *trans;
10526 struct btrfs_key key;
10529 printf("Recowing metadata block %llu\n", eb->start);
10530 key.objectid = btrfs_header_owner(eb);
10531 key.type = BTRFS_ROOT_ITEM_KEY;
10532 key.offset = (u64)-1;
10534 root = btrfs_read_fs_root(root->fs_info, &key);
10535 if (IS_ERR(root)) {
10536 fprintf(stderr, "Couldn't find owner root %llu\n",
10538 return PTR_ERR(root);
10541 path = btrfs_alloc_path();
10545 trans = btrfs_start_transaction(root, 1);
10546 if (IS_ERR(trans)) {
10547 btrfs_free_path(path);
10548 return PTR_ERR(trans);
10551 path->lowest_level = btrfs_header_level(eb);
10552 if (path->lowest_level)
10553 btrfs_node_key_to_cpu(eb, &key, 0);
10555 btrfs_item_key_to_cpu(eb, &key, 0);
10557 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10558 btrfs_commit_transaction(trans, root);
10559 btrfs_free_path(path);
10563 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10565 struct btrfs_path *path;
10566 struct btrfs_trans_handle *trans;
10567 struct btrfs_key key;
10570 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10571 bad->key.type, bad->key.offset);
10572 key.objectid = bad->root_id;
10573 key.type = BTRFS_ROOT_ITEM_KEY;
10574 key.offset = (u64)-1;
10576 root = btrfs_read_fs_root(root->fs_info, &key);
10577 if (IS_ERR(root)) {
10578 fprintf(stderr, "Couldn't find owner root %llu\n",
10580 return PTR_ERR(root);
10583 path = btrfs_alloc_path();
10587 trans = btrfs_start_transaction(root, 1);
10588 if (IS_ERR(trans)) {
10589 btrfs_free_path(path);
10590 return PTR_ERR(trans);
10593 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10599 ret = btrfs_del_item(trans, root, path);
10601 btrfs_commit_transaction(trans, root);
10602 btrfs_free_path(path);
10606 static int zero_log_tree(struct btrfs_root *root)
10608 struct btrfs_trans_handle *trans;
10611 trans = btrfs_start_transaction(root, 1);
10612 if (IS_ERR(trans)) {
10613 ret = PTR_ERR(trans);
10616 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10617 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10618 ret = btrfs_commit_transaction(trans, root);
10622 static int populate_csum(struct btrfs_trans_handle *trans,
10623 struct btrfs_root *csum_root, char *buf, u64 start,
10630 while (offset < len) {
10631 sectorsize = csum_root->sectorsize;
10632 ret = read_extent_data(csum_root, buf, start + offset,
10636 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10637 start + offset, buf, sectorsize);
10640 offset += sectorsize;
10645 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10646 struct btrfs_root *csum_root,
10647 struct btrfs_root *cur_root)
10649 struct btrfs_path *path;
10650 struct btrfs_key key;
10651 struct extent_buffer *node;
10652 struct btrfs_file_extent_item *fi;
10659 path = btrfs_alloc_path();
10662 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10672 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10675 /* Iterate all regular file extents and fill its csum */
10677 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10679 if (key.type != BTRFS_EXTENT_DATA_KEY)
10681 node = path->nodes[0];
10682 slot = path->slots[0];
10683 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10684 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10686 start = btrfs_file_extent_disk_bytenr(node, fi);
10687 len = btrfs_file_extent_disk_num_bytes(node, fi);
10689 ret = populate_csum(trans, csum_root, buf, start, len);
10690 if (ret == -EEXIST)
10696 * TODO: if next leaf is corrupted, jump to nearest next valid
10699 ret = btrfs_next_item(cur_root, path);
10709 btrfs_free_path(path);
10714 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10715 struct btrfs_root *csum_root)
10717 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10718 struct btrfs_path *path;
10719 struct btrfs_root *tree_root = fs_info->tree_root;
10720 struct btrfs_root *cur_root;
10721 struct extent_buffer *node;
10722 struct btrfs_key key;
10726 path = btrfs_alloc_path();
10730 key.objectid = BTRFS_FS_TREE_OBJECTID;
10732 key.type = BTRFS_ROOT_ITEM_KEY;
10734 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10743 node = path->nodes[0];
10744 slot = path->slots[0];
10745 btrfs_item_key_to_cpu(node, &key, slot);
10746 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10748 if (key.type != BTRFS_ROOT_ITEM_KEY)
10750 if (!is_fstree(key.objectid))
10752 key.offset = (u64)-1;
10754 cur_root = btrfs_read_fs_root(fs_info, &key);
10755 if (IS_ERR(cur_root) || !cur_root) {
10756 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10760 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10765 ret = btrfs_next_item(tree_root, path);
10775 btrfs_free_path(path);
10779 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10780 struct btrfs_root *csum_root)
10782 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10783 struct btrfs_path *path;
10784 struct btrfs_extent_item *ei;
10785 struct extent_buffer *leaf;
10787 struct btrfs_key key;
10790 path = btrfs_alloc_path();
10795 key.type = BTRFS_EXTENT_ITEM_KEY;
10798 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10800 btrfs_free_path(path);
10804 buf = malloc(csum_root->sectorsize);
10806 btrfs_free_path(path);
10811 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10812 ret = btrfs_next_leaf(extent_root, path);
10820 leaf = path->nodes[0];
10822 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10823 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10828 ei = btrfs_item_ptr(leaf, path->slots[0],
10829 struct btrfs_extent_item);
10830 if (!(btrfs_extent_flags(leaf, ei) &
10831 BTRFS_EXTENT_FLAG_DATA)) {
10836 ret = populate_csum(trans, csum_root, buf, key.objectid,
10843 btrfs_free_path(path);
10849 * Recalculate the csum and put it into the csum tree.
10851 * Extent tree init will wipe out all the extent info, so in that case, we
10852 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10853 * will use fs/subvol trees to init the csum tree.
10855 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10856 struct btrfs_root *csum_root,
10857 int search_fs_tree)
10859 if (search_fs_tree)
10860 return fill_csum_tree_from_fs(trans, csum_root);
10862 return fill_csum_tree_from_extent(trans, csum_root);
10865 static void free_roots_info_cache(void)
10867 if (!roots_info_cache)
10870 while (!cache_tree_empty(roots_info_cache)) {
10871 struct cache_extent *entry;
10872 struct root_item_info *rii;
10874 entry = first_cache_extent(roots_info_cache);
10877 remove_cache_extent(roots_info_cache, entry);
10878 rii = container_of(entry, struct root_item_info, cache_extent);
10882 free(roots_info_cache);
10883 roots_info_cache = NULL;
10886 static int build_roots_info_cache(struct btrfs_fs_info *info)
10889 struct btrfs_key key;
10890 struct extent_buffer *leaf;
10891 struct btrfs_path *path;
10893 if (!roots_info_cache) {
10894 roots_info_cache = malloc(sizeof(*roots_info_cache));
10895 if (!roots_info_cache)
10897 cache_tree_init(roots_info_cache);
10900 path = btrfs_alloc_path();
10905 key.type = BTRFS_EXTENT_ITEM_KEY;
10908 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10911 leaf = path->nodes[0];
10914 struct btrfs_key found_key;
10915 struct btrfs_extent_item *ei;
10916 struct btrfs_extent_inline_ref *iref;
10917 int slot = path->slots[0];
10922 struct cache_extent *entry;
10923 struct root_item_info *rii;
10925 if (slot >= btrfs_header_nritems(leaf)) {
10926 ret = btrfs_next_leaf(info->extent_root, path);
10933 leaf = path->nodes[0];
10934 slot = path->slots[0];
10937 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10939 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10940 found_key.type != BTRFS_METADATA_ITEM_KEY)
10943 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10944 flags = btrfs_extent_flags(leaf, ei);
10946 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10947 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10950 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10951 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10952 level = found_key.offset;
10954 struct btrfs_tree_block_info *binfo;
10956 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10957 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10958 level = btrfs_tree_block_level(leaf, binfo);
10962 * For a root extent, it must be of the following type and the
10963 * first (and only one) iref in the item.
10965 type = btrfs_extent_inline_ref_type(leaf, iref);
10966 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10969 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10970 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10972 rii = malloc(sizeof(struct root_item_info));
10977 rii->cache_extent.start = root_id;
10978 rii->cache_extent.size = 1;
10979 rii->level = (u8)-1;
10980 entry = &rii->cache_extent;
10981 ret = insert_cache_extent(roots_info_cache, entry);
10984 rii = container_of(entry, struct root_item_info,
10988 ASSERT(rii->cache_extent.start == root_id);
10989 ASSERT(rii->cache_extent.size == 1);
10991 if (level > rii->level || rii->level == (u8)-1) {
10992 rii->level = level;
10993 rii->bytenr = found_key.objectid;
10994 rii->gen = btrfs_extent_generation(leaf, ei);
10995 rii->node_count = 1;
10996 } else if (level == rii->level) {
11004 btrfs_free_path(path);
11009 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11010 struct btrfs_path *path,
11011 const struct btrfs_key *root_key,
11012 const int read_only_mode)
11014 const u64 root_id = root_key->objectid;
11015 struct cache_extent *entry;
11016 struct root_item_info *rii;
11017 struct btrfs_root_item ri;
11018 unsigned long offset;
11020 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11023 "Error: could not find extent items for root %llu\n",
11024 root_key->objectid);
11028 rii = container_of(entry, struct root_item_info, cache_extent);
11029 ASSERT(rii->cache_extent.start == root_id);
11030 ASSERT(rii->cache_extent.size == 1);
11032 if (rii->node_count != 1) {
11034 "Error: could not find btree root extent for root %llu\n",
11039 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11040 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11042 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11043 btrfs_root_level(&ri) != rii->level ||
11044 btrfs_root_generation(&ri) != rii->gen) {
11047 * If we're in repair mode but our caller told us to not update
11048 * the root item, i.e. just check if it needs to be updated, don't
11049 * print this message, since the caller will call us again shortly
11050 * for the same root item without read only mode (the caller will
11051 * open a transaction first).
11053 if (!(read_only_mode && repair))
11055 "%sroot item for root %llu,"
11056 " current bytenr %llu, current gen %llu, current level %u,"
11057 " new bytenr %llu, new gen %llu, new level %u\n",
11058 (read_only_mode ? "" : "fixing "),
11060 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11061 btrfs_root_level(&ri),
11062 rii->bytenr, rii->gen, rii->level);
11064 if (btrfs_root_generation(&ri) > rii->gen) {
11066 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11067 root_id, btrfs_root_generation(&ri), rii->gen);
11071 if (!read_only_mode) {
11072 btrfs_set_root_bytenr(&ri, rii->bytenr);
11073 btrfs_set_root_level(&ri, rii->level);
11074 btrfs_set_root_generation(&ri, rii->gen);
11075 write_extent_buffer(path->nodes[0], &ri,
11076 offset, sizeof(ri));
11086 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11087 * caused read-only snapshots to be corrupted if they were created at a moment
11088 * when the source subvolume/snapshot had orphan items. The issue was that the
11089 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11090 * node instead of the post orphan cleanup root node.
11091 * So this function, and its callees, just detects and fixes those cases. Even
11092 * though the regression was for read-only snapshots, this function applies to
11093 * any snapshot/subvolume root.
11094 * This must be run before any other repair code - not doing it so, makes other
11095 * repair code delete or modify backrefs in the extent tree for example, which
11096 * will result in an inconsistent fs after repairing the root items.
11098 static int repair_root_items(struct btrfs_fs_info *info)
11100 struct btrfs_path *path = NULL;
11101 struct btrfs_key key;
11102 struct extent_buffer *leaf;
11103 struct btrfs_trans_handle *trans = NULL;
11106 int need_trans = 0;
11108 ret = build_roots_info_cache(info);
11112 path = btrfs_alloc_path();
11118 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11119 key.type = BTRFS_ROOT_ITEM_KEY;
11124 * Avoid opening and committing transactions if a leaf doesn't have
11125 * any root items that need to be fixed, so that we avoid rotating
11126 * backup roots unnecessarily.
11129 trans = btrfs_start_transaction(info->tree_root, 1);
11130 if (IS_ERR(trans)) {
11131 ret = PTR_ERR(trans);
11136 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11140 leaf = path->nodes[0];
11143 struct btrfs_key found_key;
11145 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11146 int no_more_keys = find_next_key(path, &key);
11148 btrfs_release_path(path);
11150 ret = btrfs_commit_transaction(trans,
11162 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11164 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11166 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11169 ret = maybe_repair_root_item(info, path, &found_key,
11174 if (!trans && repair) {
11177 btrfs_release_path(path);
11187 free_roots_info_cache();
11188 btrfs_free_path(path);
11190 btrfs_commit_transaction(trans, info->tree_root);
11197 const char * const cmd_check_usage[] = {
11198 "btrfs check [options] <device>",
11199 "Check structural integrity of a filesystem (unmounted).",
11200 "Check structural integrity of an unmounted filesystem. Verify internal",
11201 "trees' consistency and item connectivity. In the repair mode try to",
11202 "fix the problems found. ",
11203 "WARNING: the repair mode is considered dangerous",
11205 "-s|--super <superblock> use this superblock copy",
11206 "-b|--backup use the first valid backup root copy",
11207 "--repair try to repair the filesystem",
11208 "--readonly run in read-only mode (default)",
11209 "--init-csum-tree create a new CRC tree",
11210 "--init-extent-tree create a new extent tree",
11211 "--mode <MODE> select mode, allows to make some memory/IO",
11212 " trade-offs, where MODE is one of:",
11213 " original - read inodes and extents to memory (requires",
11214 " more memory, does less IO)",
11215 " lowmem - try to use less memory but read blocks again",
11217 "--check-data-csum verify checksums of data blocks",
11218 "-Q|--qgroup-report print a report on qgroup consistency",
11219 "-E|--subvol-extents <subvolid>",
11220 " print subvolume extents and sharing state",
11221 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11222 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11223 "-p|--progress indicate progress",
11227 int cmd_check(int argc, char **argv)
11229 struct cache_tree root_cache;
11230 struct btrfs_root *root;
11231 struct btrfs_fs_info *info;
11234 u64 tree_root_bytenr = 0;
11235 u64 chunk_root_bytenr = 0;
11236 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11239 int init_csum_tree = 0;
11241 int qgroup_report = 0;
11242 int qgroups_repaired = 0;
11243 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11247 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11248 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11249 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11251 static const struct option long_options[] = {
11252 { "super", required_argument, NULL, 's' },
11253 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11254 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11255 { "init-csum-tree", no_argument, NULL,
11256 GETOPT_VAL_INIT_CSUM },
11257 { "init-extent-tree", no_argument, NULL,
11258 GETOPT_VAL_INIT_EXTENT },
11259 { "check-data-csum", no_argument, NULL,
11260 GETOPT_VAL_CHECK_CSUM },
11261 { "backup", no_argument, NULL, 'b' },
11262 { "subvol-extents", required_argument, NULL, 'E' },
11263 { "qgroup-report", no_argument, NULL, 'Q' },
11264 { "tree-root", required_argument, NULL, 'r' },
11265 { "chunk-root", required_argument, NULL,
11266 GETOPT_VAL_CHUNK_TREE },
11267 { "progress", no_argument, NULL, 'p' },
11268 { "mode", required_argument, NULL,
11270 { NULL, 0, NULL, 0}
11273 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11277 case 'a': /* ignored */ break;
11279 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11282 num = arg_strtou64(optarg);
11283 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11285 "ERROR: super mirror should be less than: %d\n",
11286 BTRFS_SUPER_MIRROR_MAX);
11289 bytenr = btrfs_sb_offset(((int)num));
11290 printf("using SB copy %llu, bytenr %llu\n", num,
11291 (unsigned long long)bytenr);
11297 subvolid = arg_strtou64(optarg);
11300 tree_root_bytenr = arg_strtou64(optarg);
11302 case GETOPT_VAL_CHUNK_TREE:
11303 chunk_root_bytenr = arg_strtou64(optarg);
11306 ctx.progress_enabled = true;
11310 usage(cmd_check_usage);
11311 case GETOPT_VAL_REPAIR:
11312 printf("enabling repair mode\n");
11314 ctree_flags |= OPEN_CTREE_WRITES;
11316 case GETOPT_VAL_READONLY:
11319 case GETOPT_VAL_INIT_CSUM:
11320 printf("Creating a new CRC tree\n");
11321 init_csum_tree = 1;
11323 ctree_flags |= OPEN_CTREE_WRITES;
11325 case GETOPT_VAL_INIT_EXTENT:
11326 init_extent_tree = 1;
11327 ctree_flags |= (OPEN_CTREE_WRITES |
11328 OPEN_CTREE_NO_BLOCK_GROUPS);
11331 case GETOPT_VAL_CHECK_CSUM:
11332 check_data_csum = 1;
11334 case GETOPT_VAL_MODE:
11335 check_mode = parse_check_mode(optarg);
11336 if (check_mode == CHECK_MODE_UNKNOWN) {
11337 error("unknown mode: %s", optarg);
11344 if (check_argc_exact(argc - optind, 1))
11345 usage(cmd_check_usage);
11347 if (ctx.progress_enabled) {
11348 ctx.tp = TASK_NOTHING;
11349 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11352 /* This check is the only reason for --readonly to exist */
11353 if (readonly && repair) {
11354 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11359 * Not supported yet
11361 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11362 error("Low memory mode doesn't support repair yet");
11367 cache_tree_init(&root_cache);
11369 if((ret = check_mounted(argv[optind])) < 0) {
11370 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11373 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11378 /* only allow partial opening under repair mode */
11380 ctree_flags |= OPEN_CTREE_PARTIAL;
11382 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11383 chunk_root_bytenr, ctree_flags);
11385 fprintf(stderr, "Couldn't open file system\n");
11390 global_info = info;
11391 root = info->fs_root;
11394 * repair mode will force us to commit transaction which
11395 * will make us fail to load log tree when mounting.
11397 if (repair && btrfs_super_log_root(info->super_copy)) {
11398 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11403 ret = zero_log_tree(root);
11405 fprintf(stderr, "fail to zero log tree\n");
11410 uuid_unparse(info->super_copy->fsid, uuidbuf);
11411 if (qgroup_report) {
11412 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11414 ret = qgroup_verify_all(info);
11420 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11421 subvolid, argv[optind], uuidbuf);
11422 ret = print_extent_state(info, subvolid);
11425 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11427 if (!extent_buffer_uptodate(info->tree_root->node) ||
11428 !extent_buffer_uptodate(info->dev_root->node) ||
11429 !extent_buffer_uptodate(info->chunk_root->node)) {
11430 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11435 if (init_extent_tree || init_csum_tree) {
11436 struct btrfs_trans_handle *trans;
11438 trans = btrfs_start_transaction(info->extent_root, 0);
11439 if (IS_ERR(trans)) {
11440 fprintf(stderr, "Error starting transaction\n");
11441 ret = PTR_ERR(trans);
11445 if (init_extent_tree) {
11446 printf("Creating a new extent tree\n");
11447 ret = reinit_extent_tree(trans, info);
11452 if (init_csum_tree) {
11453 fprintf(stderr, "Reinit crc root\n");
11454 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11456 fprintf(stderr, "crc root initialization failed\n");
11461 ret = fill_csum_tree(trans, info->csum_root,
11464 fprintf(stderr, "crc refilling failed\n");
11469 * Ok now we commit and run the normal fsck, which will add
11470 * extent entries for all of the items it finds.
11472 ret = btrfs_commit_transaction(trans, info->extent_root);
11476 if (!extent_buffer_uptodate(info->extent_root->node)) {
11477 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11481 if (!extent_buffer_uptodate(info->csum_root->node)) {
11482 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11487 if (!ctx.progress_enabled)
11488 fprintf(stderr, "checking extents\n");
11489 if (check_mode == CHECK_MODE_LOWMEM)
11490 ret = check_chunks_and_extents_v2(root);
11492 ret = check_chunks_and_extents(root);
11494 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11496 ret = repair_root_items(info);
11500 fprintf(stderr, "Fixed %d roots.\n", ret);
11502 } else if (ret > 0) {
11504 "Found %d roots with an outdated root item.\n",
11507 "Please run a filesystem check with the option --repair to fix them.\n");
11512 if (!ctx.progress_enabled) {
11513 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11514 fprintf(stderr, "checking free space tree\n");
11516 fprintf(stderr, "checking free space cache\n");
11518 ret = check_space_cache(root);
11523 * We used to have to have these hole extents in between our real
11524 * extents so if we don't have this flag set we need to make sure there
11525 * are no gaps in the file extents for inodes, otherwise we can just
11526 * ignore it when this happens.
11528 no_holes = btrfs_fs_incompat(root->fs_info,
11529 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11530 if (!ctx.progress_enabled)
11531 fprintf(stderr, "checking fs roots\n");
11532 ret = check_fs_roots(root, &root_cache);
11536 fprintf(stderr, "checking csums\n");
11537 ret = check_csums(root);
11541 fprintf(stderr, "checking root refs\n");
11542 ret = check_root_refs(root, &root_cache);
11546 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11547 struct extent_buffer *eb;
11549 eb = list_first_entry(&root->fs_info->recow_ebs,
11550 struct extent_buffer, recow);
11551 list_del_init(&eb->recow);
11552 ret = recow_extent_buffer(root, eb);
11557 while (!list_empty(&delete_items)) {
11558 struct bad_item *bad;
11560 bad = list_first_entry(&delete_items, struct bad_item, list);
11561 list_del_init(&bad->list);
11563 ret = delete_bad_item(root, bad);
11567 if (info->quota_enabled) {
11569 fprintf(stderr, "checking quota groups\n");
11570 err = qgroup_verify_all(info);
11574 err = repair_qgroups(info, &qgroups_repaired);
11579 if (!list_empty(&root->fs_info->recow_ebs)) {
11580 fprintf(stderr, "Transid errors in file system\n");
11584 /* Don't override original ret */
11585 if (!ret && qgroups_repaired)
11586 ret = qgroups_repaired;
11588 if (found_old_backref) { /*
11589 * there was a disk format change when mixed
11590 * backref was in testing tree. The old format
11591 * existed about one week.
11593 printf("\n * Found old mixed backref format. "
11594 "The old format is not supported! *"
11595 "\n * Please mount the FS in readonly mode, "
11596 "backup data and re-format the FS. *\n\n");
11599 printf("found %llu bytes used err is %d\n",
11600 (unsigned long long)bytes_used, ret);
11601 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11602 printf("total tree bytes: %llu\n",
11603 (unsigned long long)total_btree_bytes);
11604 printf("total fs tree bytes: %llu\n",
11605 (unsigned long long)total_fs_tree_bytes);
11606 printf("total extent tree bytes: %llu\n",
11607 (unsigned long long)total_extent_tree_bytes);
11608 printf("btree space waste bytes: %llu\n",
11609 (unsigned long long)btree_space_waste);
11610 printf("file data blocks allocated: %llu\n referenced %llu\n",
11611 (unsigned long long)data_bytes_allocated,
11612 (unsigned long long)data_bytes_referenced);
11614 free_qgroup_counts();
11615 free_root_recs_tree(&root_cache);
11619 if (ctx.progress_enabled)
11620 task_deinit(ctx.info);