2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 return rb_entry(node, struct extent_backref, node);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
122 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124 struct data_backref *back1 = to_data_backref(ext1);
125 struct data_backref *back2 = to_data_backref(ext2);
127 WARN_ON(!ext1->is_data);
128 WARN_ON(!ext2->is_data);
130 /* parent and root are a union, so this covers both */
131 if (back1->parent > back2->parent)
133 if (back1->parent < back2->parent)
136 /* This is a full backref and the parents match. */
137 if (back1->node.full_backref)
140 if (back1->owner > back2->owner)
142 if (back1->owner < back2->owner)
145 if (back1->offset > back2->offset)
147 if (back1->offset < back2->offset)
150 if (back1->bytes > back2->bytes)
152 if (back1->bytes < back2->bytes)
155 if (back1->found_ref && back2->found_ref) {
156 if (back1->disk_bytenr > back2->disk_bytenr)
158 if (back1->disk_bytenr < back2->disk_bytenr)
161 if (back1->found_ref > back2->found_ref)
163 if (back1->found_ref < back2->found_ref)
171 * Much like data_backref, just removed the undetermined members
172 * and change it to use list_head.
173 * During extent scan, it is stored in root->orphan_data_extent.
174 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
176 struct orphan_data_extent {
177 struct list_head list;
185 struct tree_backref {
186 struct extent_backref node;
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
195 return container_of(back, struct tree_backref, node);
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
200 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202 struct tree_backref *back1 = to_tree_backref(ext1);
203 struct tree_backref *back2 = to_tree_backref(ext2);
205 WARN_ON(ext1->is_data);
206 WARN_ON(ext2->is_data);
208 /* parent and root are a union, so this covers both */
209 if (back1->parent > back2->parent)
211 if (back1->parent < back2->parent)
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
219 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 if (ext1->is_data > ext2->is_data)
225 if (ext1->is_data < ext2->is_data)
228 if (ext1->full_backref > ext2->full_backref)
230 if (ext1->full_backref < ext2->full_backref)
234 return compare_data_backref(node1, node2);
236 return compare_tree_backref(node1, node2);
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
242 struct extent_record {
243 struct list_head backrefs;
244 struct list_head dups;
245 struct rb_root backref_tree;
246 struct list_head list;
247 struct cache_extent cache;
248 struct btrfs_disk_key parent_key;
253 u64 extent_item_refs;
255 u64 parent_generation;
259 unsigned int flag_block_full_backref:2;
260 unsigned int found_rec:1;
261 unsigned int content_checked:1;
262 unsigned int owner_ref_checked:1;
263 unsigned int is_root:1;
264 unsigned int metadata:1;
265 unsigned int bad_full_backref:1;
266 unsigned int crossing_stripes:1;
267 unsigned int wrong_chunk_type:1;
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
272 return container_of(entry, struct extent_record, list);
275 struct inode_backref {
276 struct list_head list;
277 unsigned int found_dir_item:1;
278 unsigned int found_dir_index:1;
279 unsigned int found_inode_ref:1;
280 unsigned int filetype:8;
282 unsigned int ref_type;
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
291 return list_entry(entry, struct inode_backref, list);
294 struct root_item_record {
295 struct list_head list;
302 struct btrfs_key drop_key;
305 #define REF_ERR_NO_DIR_ITEM (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX (1 << 1)
307 #define REF_ERR_NO_INODE_REF (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
310 #define REF_ERR_DUP_INODE_REF (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
319 struct file_extent_hole {
325 struct inode_record {
326 struct list_head backrefs;
327 unsigned int checked:1;
328 unsigned int merging:1;
329 unsigned int found_inode_item:1;
330 unsigned int found_dir_item:1;
331 unsigned int found_file_extent:1;
332 unsigned int found_csum_item:1;
333 unsigned int some_csum_missing:1;
334 unsigned int nodatasum:1;
347 struct rb_root holes;
348 struct list_head orphan_extents;
353 #define I_ERR_NO_INODE_ITEM (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
369 struct root_backref {
370 struct list_head list;
371 unsigned int found_dir_item:1;
372 unsigned int found_dir_index:1;
373 unsigned int found_back_ref:1;
374 unsigned int found_forward_ref:1;
375 unsigned int reachable:1;
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
386 return list_entry(entry, struct root_backref, list);
390 struct list_head backrefs;
391 struct cache_extent cache;
392 unsigned int found_root_item:1;
398 struct cache_extent cache;
403 struct cache_extent cache;
404 struct cache_tree root_cache;
405 struct cache_tree inode_cache;
406 struct inode_record *current;
415 struct walk_control {
416 struct cache_tree shared;
417 struct shared_node *nodes[BTRFS_MAX_LEVEL];
423 struct btrfs_key key;
425 struct list_head list;
428 struct extent_entry {
433 struct list_head list;
436 struct root_item_info {
437 /* level of the root */
439 /* number of nodes at this level, must be 1 for a root */
443 struct cache_extent cache_extent;
447 * Error bit for low memory mode check.
449 * Currently no caller cares about it yet. Just internal use for error
452 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH (1 << 8)
463 static void *print_status_check(void *p)
465 struct task_ctx *priv = p;
466 const char work_indicator[] = { '.', 'o', 'O', 'o' };
468 static char *task_position_string[] = {
470 "checking free space cache",
474 task_period_start(priv->info, 1000 /* 1s */);
476 if (priv->tp == TASK_NOTHING)
480 printf("%s [%c]\r", task_position_string[priv->tp],
481 work_indicator[count % 4]);
484 task_period_wait(priv->info);
489 static int print_status_return(void *p)
497 static enum btrfs_check_mode parse_check_mode(const char *str)
499 if (strcmp(str, "lowmem") == 0)
500 return CHECK_MODE_LOWMEM;
501 if (strcmp(str, "orig") == 0)
502 return CHECK_MODE_ORIGINAL;
503 if (strcmp(str, "original") == 0)
504 return CHECK_MODE_ORIGINAL;
506 return CHECK_MODE_UNKNOWN;
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
512 struct file_extent_hole *hole;
514 if (RB_EMPTY_ROOT(holes))
517 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
523 struct file_extent_hole *hole1;
524 struct file_extent_hole *hole2;
526 hole1 = rb_entry(node1, struct file_extent_hole, node);
527 hole2 = rb_entry(node2, struct file_extent_hole, node);
529 if (hole1->start > hole2->start)
531 if (hole1->start < hole2->start)
533 /* Now hole1->start == hole2->start */
534 if (hole1->len >= hole2->len)
536 * Hole 1 will be merge center
537 * Same hole will be merged later
540 /* Hole 2 will be merge center */
545 * Add a hole to the record
547 * This will do hole merge for copy_file_extent_holes(),
548 * which will ensure there won't be continuous holes.
550 static int add_file_extent_hole(struct rb_root *holes,
553 struct file_extent_hole *hole;
554 struct file_extent_hole *prev = NULL;
555 struct file_extent_hole *next = NULL;
557 hole = malloc(sizeof(*hole));
562 /* Since compare will not return 0, no -EEXIST will happen */
563 rb_insert(holes, &hole->node, compare_hole);
565 /* simple merge with previous hole */
566 if (rb_prev(&hole->node))
567 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
569 if (prev && prev->start + prev->len >= hole->start) {
570 hole->len = hole->start + hole->len - prev->start;
571 hole->start = prev->start;
572 rb_erase(&prev->node, holes);
577 /* iterate merge with next holes */
579 if (!rb_next(&hole->node))
581 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
583 if (hole->start + hole->len >= next->start) {
584 if (hole->start + hole->len <= next->start + next->len)
585 hole->len = next->start + next->len -
587 rb_erase(&next->node, holes);
596 static int compare_hole_range(struct rb_node *node, void *data)
598 struct file_extent_hole *hole;
601 hole = (struct file_extent_hole *)data;
604 hole = rb_entry(node, struct file_extent_hole, node);
605 if (start < hole->start)
607 if (start >= hole->start && start < hole->start + hole->len)
613 * Delete a hole in the record
615 * This will do the hole split and is much restrict than add.
617 static int del_file_extent_hole(struct rb_root *holes,
620 struct file_extent_hole *hole;
621 struct file_extent_hole tmp;
626 struct rb_node *node;
633 node = rb_search(holes, &tmp, compare_hole_range, NULL);
636 hole = rb_entry(node, struct file_extent_hole, node);
637 if (start + len > hole->start + hole->len)
641 * Now there will be no overlap, delete the hole and re-add the
642 * split(s) if they exists.
644 if (start > hole->start) {
645 prev_start = hole->start;
646 prev_len = start - hole->start;
649 if (hole->start + hole->len > start + len) {
650 next_start = start + len;
651 next_len = hole->start + hole->len - start - len;
654 rb_erase(node, holes);
657 ret = add_file_extent_hole(holes, prev_start, prev_len);
662 ret = add_file_extent_hole(holes, next_start, next_len);
669 static int copy_file_extent_holes(struct rb_root *dst,
672 struct file_extent_hole *hole;
673 struct rb_node *node;
676 node = rb_first(src);
678 hole = rb_entry(node, struct file_extent_hole, node);
679 ret = add_file_extent_hole(dst, hole->start, hole->len);
682 node = rb_next(node);
687 static void free_file_extent_holes(struct rb_root *holes)
689 struct rb_node *node;
690 struct file_extent_hole *hole;
692 node = rb_first(holes);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 rb_erase(node, holes);
697 node = rb_first(holes);
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root)
706 if (root->last_trans != trans->transid) {
707 root->track_dirty = 1;
708 root->last_trans = trans->transid;
709 root->commit_root = root->node;
710 extent_buffer_get(root->node);
714 static u8 imode_to_type(u32 imode)
717 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
719 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
720 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
721 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
722 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
723 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
724 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
727 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
733 struct device_record *rec1;
734 struct device_record *rec2;
736 rec1 = rb_entry(node1, struct device_record, node);
737 rec2 = rb_entry(node2, struct device_record, node);
738 if (rec1->devid > rec2->devid)
740 else if (rec1->devid < rec2->devid)
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
748 struct inode_record *rec;
749 struct inode_backref *backref;
750 struct inode_backref *orig;
751 struct inode_backref *tmp;
752 struct orphan_data_extent *src_orphan;
753 struct orphan_data_extent *dst_orphan;
757 rec = malloc(sizeof(*rec));
759 return ERR_PTR(-ENOMEM);
760 memcpy(rec, orig_rec, sizeof(*rec));
762 INIT_LIST_HEAD(&rec->backrefs);
763 INIT_LIST_HEAD(&rec->orphan_extents);
764 rec->holes = RB_ROOT;
766 list_for_each_entry(orig, &orig_rec->backrefs, list) {
767 size = sizeof(*orig) + orig->namelen + 1;
768 backref = malloc(size);
773 memcpy(backref, orig, size);
774 list_add_tail(&backref->list, &rec->backrefs);
776 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777 dst_orphan = malloc(sizeof(*dst_orphan));
782 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
785 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
791 if (!list_empty(&rec->backrefs))
792 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793 list_del(&orig->list);
797 if (!list_empty(&rec->orphan_extents))
798 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799 list_del(&orig->list);
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
811 struct orphan_data_extent *orphan;
813 if (list_empty(orphan_extents))
815 printf("The following data extent is lost in tree %llu:\n",
817 list_for_each_entry(orphan, orphan_extents, list) {
818 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819 orphan->objectid, orphan->offset, orphan->disk_bytenr,
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
826 u64 root_objectid = root->root_key.objectid;
827 int errors = rec->errors;
831 /* reloc root errors, we print its corresponding fs root objectid*/
832 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833 root_objectid = root->root_key.offset;
834 fprintf(stderr, "reloc");
836 fprintf(stderr, "root %llu inode %llu errors %x",
837 (unsigned long long) root_objectid,
838 (unsigned long long) rec->ino, rec->errors);
840 if (errors & I_ERR_NO_INODE_ITEM)
841 fprintf(stderr, ", no inode item");
842 if (errors & I_ERR_NO_ORPHAN_ITEM)
843 fprintf(stderr, ", no orphan item");
844 if (errors & I_ERR_DUP_INODE_ITEM)
845 fprintf(stderr, ", dup inode item");
846 if (errors & I_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & I_ERR_ODD_DIR_ITEM)
849 fprintf(stderr, ", odd dir item");
850 if (errors & I_ERR_ODD_FILE_EXTENT)
851 fprintf(stderr, ", odd file extent");
852 if (errors & I_ERR_BAD_FILE_EXTENT)
853 fprintf(stderr, ", bad file extent");
854 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855 fprintf(stderr, ", file extent overlap");
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857 fprintf(stderr, ", file extent discount");
858 if (errors & I_ERR_DIR_ISIZE_WRONG)
859 fprintf(stderr, ", dir isize wrong");
860 if (errors & I_ERR_FILE_NBYTES_WRONG)
861 fprintf(stderr, ", nbytes wrong");
862 if (errors & I_ERR_ODD_CSUM_ITEM)
863 fprintf(stderr, ", odd csum item");
864 if (errors & I_ERR_SOME_CSUM_MISSING)
865 fprintf(stderr, ", some csum missing");
866 if (errors & I_ERR_LINK_COUNT_WRONG)
867 fprintf(stderr, ", link count wrong");
868 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869 fprintf(stderr, ", orphan file extent");
870 fprintf(stderr, "\n");
871 /* Print the orphan extents if needed */
872 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
875 /* Print the holes if needed */
876 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877 struct file_extent_hole *hole;
878 struct rb_node *node;
881 node = rb_first(&rec->holes);
882 fprintf(stderr, "Found file extent holes:\n");
885 hole = rb_entry(node, struct file_extent_hole, node);
886 fprintf(stderr, "\tstart: %llu, len: %llu\n",
887 hole->start, hole->len);
888 node = rb_next(node);
891 fprintf(stderr, "\tstart: 0, len: %llu\n",
892 round_up(rec->isize, root->sectorsize));
896 static void print_ref_error(int errors)
898 if (errors & REF_ERR_NO_DIR_ITEM)
899 fprintf(stderr, ", no dir item");
900 if (errors & REF_ERR_NO_DIR_INDEX)
901 fprintf(stderr, ", no dir index");
902 if (errors & REF_ERR_NO_INODE_REF)
903 fprintf(stderr, ", no inode ref");
904 if (errors & REF_ERR_DUP_DIR_ITEM)
905 fprintf(stderr, ", dup dir item");
906 if (errors & REF_ERR_DUP_DIR_INDEX)
907 fprintf(stderr, ", dup dir index");
908 if (errors & REF_ERR_DUP_INODE_REF)
909 fprintf(stderr, ", dup inode ref");
910 if (errors & REF_ERR_INDEX_UNMATCH)
911 fprintf(stderr, ", index mismatch");
912 if (errors & REF_ERR_FILETYPE_UNMATCH)
913 fprintf(stderr, ", filetype mismatch");
914 if (errors & REF_ERR_NAME_TOO_LONG)
915 fprintf(stderr, ", name too long");
916 if (errors & REF_ERR_NO_ROOT_REF)
917 fprintf(stderr, ", no root ref");
918 if (errors & REF_ERR_NO_ROOT_BACKREF)
919 fprintf(stderr, ", no root backref");
920 if (errors & REF_ERR_DUP_ROOT_REF)
921 fprintf(stderr, ", dup root ref");
922 if (errors & REF_ERR_DUP_ROOT_BACKREF)
923 fprintf(stderr, ", dup root backref");
924 fprintf(stderr, "\n");
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
930 struct ptr_node *node;
931 struct cache_extent *cache;
932 struct inode_record *rec = NULL;
935 cache = lookup_cache_extent(inode_cache, ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
939 if (mod && rec->refs > 1) {
940 node->data = clone_inode_rec(rec);
941 if (IS_ERR(node->data))
947 rec = calloc(1, sizeof(*rec));
949 return ERR_PTR(-ENOMEM);
951 rec->extent_start = (u64)-1;
953 INIT_LIST_HEAD(&rec->backrefs);
954 INIT_LIST_HEAD(&rec->orphan_extents);
955 rec->holes = RB_ROOT;
957 node = malloc(sizeof(*node));
960 return ERR_PTR(-ENOMEM);
962 node->cache.start = ino;
963 node->cache.size = 1;
966 if (ino == BTRFS_FREE_INO_OBJECTID)
969 ret = insert_cache_extent(inode_cache, &node->cache);
971 return ERR_PTR(-EEXIST);
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
978 struct orphan_data_extent *orphan;
980 while (!list_empty(orphan_extents)) {
981 orphan = list_entry(orphan_extents->next,
982 struct orphan_data_extent, list);
983 list_del(&orphan->list);
988 static void free_inode_rec(struct inode_record *rec)
990 struct inode_backref *backref;
995 while (!list_empty(&rec->backrefs)) {
996 backref = to_inode_backref(rec->backrefs.next);
997 list_del(&backref->list);
1000 free_orphan_data_extents(&rec->orphan_extents);
1001 free_file_extent_holes(&rec->holes);
1005 static int can_free_inode_rec(struct inode_record *rec)
1007 if (!rec->errors && rec->checked && rec->found_inode_item &&
1008 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014 struct inode_record *rec)
1016 struct cache_extent *cache;
1017 struct inode_backref *tmp, *backref;
1018 struct ptr_node *node;
1019 unsigned char filetype;
1021 if (!rec->found_inode_item)
1024 filetype = imode_to_type(rec->imode);
1025 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026 if (backref->found_dir_item && backref->found_dir_index) {
1027 if (backref->filetype != filetype)
1028 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029 if (!backref->errors && backref->found_inode_ref &&
1030 rec->nlink == rec->found_link) {
1031 list_del(&backref->list);
1037 if (!rec->checked || rec->merging)
1040 if (S_ISDIR(rec->imode)) {
1041 if (rec->found_size != rec->isize)
1042 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043 if (rec->found_file_extent)
1044 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046 if (rec->found_dir_item)
1047 rec->errors |= I_ERR_ODD_DIR_ITEM;
1048 if (rec->found_size != rec->nbytes)
1049 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050 if (rec->nlink > 0 && !no_holes &&
1051 (rec->extent_end < rec->isize ||
1052 first_extent_gap(&rec->holes) < rec->isize))
1053 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1056 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057 if (rec->found_csum_item && rec->nodatasum)
1058 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059 if (rec->some_csum_missing && !rec->nodatasum)
1060 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1063 BUG_ON(rec->refs != 1);
1064 if (can_free_inode_rec(rec)) {
1065 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066 node = container_of(cache, struct ptr_node, cache);
1067 BUG_ON(node->data != rec);
1068 remove_cache_extent(inode_cache, &node->cache);
1070 free_inode_rec(rec);
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1076 struct btrfs_path path;
1077 struct btrfs_key key;
1080 key.objectid = BTRFS_ORPHAN_OBJECTID;
1081 key.type = BTRFS_ORPHAN_ITEM_KEY;
1084 btrfs_init_path(&path);
1085 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086 btrfs_release_path(&path);
1092 static int process_inode_item(struct extent_buffer *eb,
1093 int slot, struct btrfs_key *key,
1094 struct shared_node *active_node)
1096 struct inode_record *rec;
1097 struct btrfs_inode_item *item;
1099 rec = active_node->current;
1100 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101 if (rec->found_inode_item) {
1102 rec->errors |= I_ERR_DUP_INODE_ITEM;
1105 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106 rec->nlink = btrfs_inode_nlink(eb, item);
1107 rec->isize = btrfs_inode_size(eb, item);
1108 rec->nbytes = btrfs_inode_nbytes(eb, item);
1109 rec->imode = btrfs_inode_mode(eb, item);
1110 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1112 rec->found_inode_item = 1;
1113 if (rec->nlink == 0)
1114 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115 maybe_free_inode_rec(&active_node->inode_cache, rec);
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1121 int namelen, u64 dir)
1123 struct inode_backref *backref;
1125 list_for_each_entry(backref, &rec->backrefs, list) {
1126 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1128 if (backref->dir != dir || backref->namelen != namelen)
1130 if (memcmp(name, backref->name, namelen))
1135 backref = malloc(sizeof(*backref) + namelen + 1);
1138 memset(backref, 0, sizeof(*backref));
1140 backref->namelen = namelen;
1141 memcpy(backref->name, name, namelen);
1142 backref->name[namelen] = '\0';
1143 list_add_tail(&backref->list, &rec->backrefs);
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148 u64 ino, u64 dir, u64 index,
1149 const char *name, int namelen,
1150 int filetype, int itemtype, int errors)
1152 struct inode_record *rec;
1153 struct inode_backref *backref;
1155 rec = get_inode_rec(inode_cache, ino, 1);
1156 BUG_ON(IS_ERR(rec));
1157 backref = get_inode_backref(rec, name, namelen, dir);
1160 backref->errors |= errors;
1161 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162 if (backref->found_dir_index)
1163 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164 if (backref->found_inode_ref && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 if (backref->found_dir_item && backref->filetype != filetype)
1167 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1169 backref->index = index;
1170 backref->filetype = filetype;
1171 backref->found_dir_index = 1;
1172 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1174 if (backref->found_dir_item)
1175 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176 if (backref->found_dir_index && backref->filetype != filetype)
1177 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1179 backref->filetype = filetype;
1180 backref->found_dir_item = 1;
1181 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183 if (backref->found_inode_ref)
1184 backref->errors |= REF_ERR_DUP_INODE_REF;
1185 if (backref->found_dir_index && backref->index != index)
1186 backref->errors |= REF_ERR_INDEX_UNMATCH;
1188 backref->index = index;
1190 backref->ref_type = itemtype;
1191 backref->found_inode_ref = 1;
1196 maybe_free_inode_rec(inode_cache, rec);
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201 struct cache_tree *dst_cache)
1203 struct inode_backref *backref;
1208 list_for_each_entry(backref, &src->backrefs, list) {
1209 if (backref->found_dir_index) {
1210 add_inode_backref(dst_cache, dst->ino, backref->dir,
1211 backref->index, backref->name,
1212 backref->namelen, backref->filetype,
1213 BTRFS_DIR_INDEX_KEY, backref->errors);
1215 if (backref->found_dir_item) {
1217 add_inode_backref(dst_cache, dst->ino,
1218 backref->dir, 0, backref->name,
1219 backref->namelen, backref->filetype,
1220 BTRFS_DIR_ITEM_KEY, backref->errors);
1222 if (backref->found_inode_ref) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, backref->index,
1225 backref->name, backref->namelen, 0,
1226 backref->ref_type, backref->errors);
1230 if (src->found_dir_item)
1231 dst->found_dir_item = 1;
1232 if (src->found_file_extent)
1233 dst->found_file_extent = 1;
1234 if (src->found_csum_item)
1235 dst->found_csum_item = 1;
1236 if (src->some_csum_missing)
1237 dst->some_csum_missing = 1;
1238 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1244 BUG_ON(src->found_link < dir_count);
1245 dst->found_link += src->found_link - dir_count;
1246 dst->found_size += src->found_size;
1247 if (src->extent_start != (u64)-1) {
1248 if (dst->extent_start == (u64)-1) {
1249 dst->extent_start = src->extent_start;
1250 dst->extent_end = src->extent_end;
1252 if (dst->extent_end > src->extent_start)
1253 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254 else if (dst->extent_end < src->extent_start) {
1255 ret = add_file_extent_hole(&dst->holes,
1257 src->extent_start - dst->extent_end);
1259 if (dst->extent_end < src->extent_end)
1260 dst->extent_end = src->extent_end;
1264 dst->errors |= src->errors;
1265 if (src->found_inode_item) {
1266 if (!dst->found_inode_item) {
1267 dst->nlink = src->nlink;
1268 dst->isize = src->isize;
1269 dst->nbytes = src->nbytes;
1270 dst->imode = src->imode;
1271 dst->nodatasum = src->nodatasum;
1272 dst->found_inode_item = 1;
1274 dst->errors |= I_ERR_DUP_INODE_ITEM;
1282 static int splice_shared_node(struct shared_node *src_node,
1283 struct shared_node *dst_node)
1285 struct cache_extent *cache;
1286 struct ptr_node *node, *ins;
1287 struct cache_tree *src, *dst;
1288 struct inode_record *rec, *conflict;
1289 u64 current_ino = 0;
1293 if (--src_node->refs == 0)
1295 if (src_node->current)
1296 current_ino = src_node->current->ino;
1298 src = &src_node->root_cache;
1299 dst = &dst_node->root_cache;
1301 cache = search_cache_extent(src, 0);
1303 node = container_of(cache, struct ptr_node, cache);
1305 cache = next_cache_extent(cache);
1308 remove_cache_extent(src, &node->cache);
1311 ins = malloc(sizeof(*ins));
1313 ins->cache.start = node->cache.start;
1314 ins->cache.size = node->cache.size;
1318 ret = insert_cache_extent(dst, &ins->cache);
1319 if (ret == -EEXIST) {
1320 conflict = get_inode_rec(dst, rec->ino, 1);
1321 BUG_ON(IS_ERR(conflict));
1322 merge_inode_recs(rec, conflict, dst);
1324 conflict->checked = 1;
1325 if (dst_node->current == conflict)
1326 dst_node->current = NULL;
1328 maybe_free_inode_rec(dst, conflict);
1329 free_inode_rec(rec);
1336 if (src == &src_node->root_cache) {
1337 src = &src_node->inode_cache;
1338 dst = &dst_node->inode_cache;
1342 if (current_ino > 0 && (!dst_node->current ||
1343 current_ino > dst_node->current->ino)) {
1344 if (dst_node->current) {
1345 dst_node->current->checked = 1;
1346 maybe_free_inode_rec(dst, dst_node->current);
1348 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349 BUG_ON(IS_ERR(dst_node->current));
1354 static void free_inode_ptr(struct cache_extent *cache)
1356 struct ptr_node *node;
1357 struct inode_record *rec;
1359 node = container_of(cache, struct ptr_node, cache);
1361 free_inode_rec(rec);
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1370 struct cache_extent *cache;
1371 struct shared_node *node;
1373 cache = lookup_cache_extent(shared, bytenr, 1);
1375 node = container_of(cache, struct shared_node, cache);
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1384 struct shared_node *node;
1386 node = calloc(1, sizeof(*node));
1389 node->cache.start = bytenr;
1390 node->cache.size = 1;
1391 cache_tree_init(&node->root_cache);
1392 cache_tree_init(&node->inode_cache);
1395 ret = insert_cache_extent(shared, &node->cache);
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401 struct walk_control *wc, int level)
1403 struct shared_node *node;
1404 struct shared_node *dest;
1407 if (level == wc->active_node)
1410 BUG_ON(wc->active_node <= level);
1411 node = find_shared_node(&wc->shared, bytenr);
1413 ret = add_shared_node(&wc->shared, bytenr, refs);
1415 node = find_shared_node(&wc->shared, bytenr);
1416 wc->nodes[level] = node;
1417 wc->active_node = level;
1421 if (wc->root_level == wc->active_node &&
1422 btrfs_root_refs(&root->root_item) == 0) {
1423 if (--node->refs == 0) {
1424 free_inode_recs_tree(&node->root_cache);
1425 free_inode_recs_tree(&node->inode_cache);
1426 remove_cache_extent(&wc->shared, &node->cache);
1432 dest = wc->nodes[wc->active_node];
1433 splice_shared_node(node, dest);
1434 if (node->refs == 0) {
1435 remove_cache_extent(&wc->shared, &node->cache);
1441 static int leave_shared_node(struct btrfs_root *root,
1442 struct walk_control *wc, int level)
1444 struct shared_node *node;
1445 struct shared_node *dest;
1448 if (level == wc->root_level)
1451 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1455 BUG_ON(i >= BTRFS_MAX_LEVEL);
1457 node = wc->nodes[wc->active_node];
1458 wc->nodes[wc->active_node] = NULL;
1459 wc->active_node = i;
1461 dest = wc->nodes[wc->active_node];
1462 if (wc->active_node < wc->root_level ||
1463 btrfs_root_refs(&root->root_item) > 0) {
1464 BUG_ON(node->refs <= 1);
1465 splice_shared_node(node, dest);
1467 BUG_ON(node->refs < 2);
1476 * 1 - if the root with id child_root_id is a child of root parent_root_id
1477 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1478 * has other root(s) as parent(s)
1479 * 2 - if the root child_root_id doesn't have any parent roots
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1484 struct btrfs_path path;
1485 struct btrfs_key key;
1486 struct extent_buffer *leaf;
1490 btrfs_init_path(&path);
1492 key.objectid = parent_root_id;
1493 key.type = BTRFS_ROOT_REF_KEY;
1494 key.offset = child_root_id;
1495 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1499 btrfs_release_path(&path);
1503 key.objectid = child_root_id;
1504 key.type = BTRFS_ROOT_BACKREF_KEY;
1506 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1512 leaf = path.nodes[0];
1513 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1517 leaf = path.nodes[0];
1520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521 if (key.objectid != child_root_id ||
1522 key.type != BTRFS_ROOT_BACKREF_KEY)
1527 if (key.offset == parent_root_id) {
1528 btrfs_release_path(&path);
1535 btrfs_release_path(&path);
1538 return has_parent ? 0 : 2;
1541 static int process_dir_item(struct btrfs_root *root,
1542 struct extent_buffer *eb,
1543 int slot, struct btrfs_key *key,
1544 struct shared_node *active_node)
1554 struct btrfs_dir_item *di;
1555 struct inode_record *rec;
1556 struct cache_tree *root_cache;
1557 struct cache_tree *inode_cache;
1558 struct btrfs_key location;
1559 char namebuf[BTRFS_NAME_LEN];
1561 root_cache = &active_node->root_cache;
1562 inode_cache = &active_node->inode_cache;
1563 rec = active_node->current;
1564 rec->found_dir_item = 1;
1566 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567 total = btrfs_item_size_nr(eb, slot);
1568 while (cur < total) {
1570 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571 name_len = btrfs_dir_name_len(eb, di);
1572 data_len = btrfs_dir_data_len(eb, di);
1573 filetype = btrfs_dir_type(eb, di);
1575 rec->found_size += name_len;
1576 if (name_len <= BTRFS_NAME_LEN) {
1580 len = BTRFS_NAME_LEN;
1581 error = REF_ERR_NAME_TOO_LONG;
1583 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1585 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586 add_inode_backref(inode_cache, location.objectid,
1587 key->objectid, key->offset, namebuf,
1588 len, filetype, key->type, error);
1589 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590 add_inode_backref(root_cache, location.objectid,
1591 key->objectid, key->offset,
1592 namebuf, len, filetype,
1595 fprintf(stderr, "invalid location in dir item %u\n",
1597 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598 key->objectid, key->offset, namebuf,
1599 len, filetype, key->type, error);
1602 len = sizeof(*di) + name_len + data_len;
1603 di = (struct btrfs_dir_item *)((char *)di + len);
1606 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607 rec->errors |= I_ERR_DUP_DIR_INDEX;
1612 static int process_inode_ref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1622 struct cache_tree *inode_cache;
1623 struct btrfs_inode_ref *ref;
1624 char namebuf[BTRFS_NAME_LEN];
1626 inode_cache = &active_node->inode_cache;
1628 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629 total = btrfs_item_size_nr(eb, slot);
1630 while (cur < total) {
1631 name_len = btrfs_inode_ref_name_len(eb, ref);
1632 index = btrfs_inode_ref_index(eb, ref);
1633 if (name_len <= BTRFS_NAME_LEN) {
1637 len = BTRFS_NAME_LEN;
1638 error = REF_ERR_NAME_TOO_LONG;
1640 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641 add_inode_backref(inode_cache, key->objectid, key->offset,
1642 index, namebuf, len, 0, key->type, error);
1644 len = sizeof(*ref) + name_len;
1645 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1651 static int process_inode_extref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1662 struct cache_tree *inode_cache;
1663 struct btrfs_inode_extref *extref;
1664 char namebuf[BTRFS_NAME_LEN];
1666 inode_cache = &active_node->inode_cache;
1668 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669 total = btrfs_item_size_nr(eb, slot);
1670 while (cur < total) {
1671 name_len = btrfs_inode_extref_name_len(eb, extref);
1672 index = btrfs_inode_extref_index(eb, extref);
1673 parent = btrfs_inode_extref_parent(eb, extref);
1674 if (name_len <= BTRFS_NAME_LEN) {
1678 len = BTRFS_NAME_LEN;
1679 error = REF_ERR_NAME_TOO_LONG;
1681 read_extent_buffer(eb, namebuf,
1682 (unsigned long)(extref + 1), len);
1683 add_inode_backref(inode_cache, key->objectid, parent,
1684 index, namebuf, len, 0, key->type, error);
1686 len = sizeof(*extref) + name_len;
1687 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695 u64 len, u64 *found)
1697 struct btrfs_key key;
1698 struct btrfs_path path;
1699 struct extent_buffer *leaf;
1704 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1706 btrfs_init_path(&path);
1708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1710 key.type = BTRFS_EXTENT_CSUM_KEY;
1712 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1716 if (ret > 0 && path.slots[0] > 0) {
1717 leaf = path.nodes[0];
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720 key.type == BTRFS_EXTENT_CSUM_KEY)
1725 leaf = path.nodes[0];
1726 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1732 leaf = path.nodes[0];
1735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737 key.type != BTRFS_EXTENT_CSUM_KEY)
1740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741 if (key.offset >= start + len)
1744 if (key.offset > start)
1747 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749 if (csum_end > start) {
1750 size = min(csum_end - start, len);
1759 btrfs_release_path(&path);
1765 static int process_file_extent(struct btrfs_root *root,
1766 struct extent_buffer *eb,
1767 int slot, struct btrfs_key *key,
1768 struct shared_node *active_node)
1770 struct inode_record *rec;
1771 struct btrfs_file_extent_item *fi;
1773 u64 disk_bytenr = 0;
1774 u64 extent_offset = 0;
1775 u64 mask = root->sectorsize - 1;
1779 rec = active_node->current;
1780 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781 rec->found_file_extent = 1;
1783 if (rec->extent_start == (u64)-1) {
1784 rec->extent_start = key->offset;
1785 rec->extent_end = key->offset;
1788 if (rec->extent_end > key->offset)
1789 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790 else if (rec->extent_end < key->offset) {
1791 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792 key->offset - rec->extent_end);
1797 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798 extent_type = btrfs_file_extent_type(eb, fi);
1800 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->found_size += num_bytes;
1805 num_bytes = (num_bytes + mask) & ~mask;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810 extent_offset = btrfs_file_extent_offset(eb, fi);
1811 if (num_bytes == 0 || (num_bytes & mask))
1812 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813 if (num_bytes + extent_offset >
1814 btrfs_file_extent_ram_bytes(eb, fi))
1815 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817 (btrfs_file_extent_compression(eb, fi) ||
1818 btrfs_file_extent_encryption(eb, fi) ||
1819 btrfs_file_extent_other_encoding(eb, fi)))
1820 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821 if (disk_bytenr > 0)
1822 rec->found_size += num_bytes;
1824 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1826 rec->extent_end = key->offset + num_bytes;
1829 * The data reloc tree will copy full extents into its inode and then
1830 * copy the corresponding csums. Because the extent it copied could be
1831 * a preallocated extent that hasn't been written to yet there may be no
1832 * csums to copy, ergo we won't have csums for our file extent. This is
1833 * ok so just don't bother checking csums if the inode belongs to the
1836 if (disk_bytenr > 0 &&
1837 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1839 if (btrfs_file_extent_compression(eb, fi))
1840 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1842 disk_bytenr += extent_offset;
1844 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1847 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1849 rec->found_csum_item = 1;
1850 if (found < num_bytes)
1851 rec->some_csum_missing = 1;
1852 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1854 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861 struct walk_control *wc)
1863 struct btrfs_key key;
1867 struct cache_tree *inode_cache;
1868 struct shared_node *active_node;
1870 if (wc->root_level == wc->active_node &&
1871 btrfs_root_refs(&root->root_item) == 0)
1874 active_node = wc->nodes[wc->active_node];
1875 inode_cache = &active_node->inode_cache;
1876 nritems = btrfs_header_nritems(eb);
1877 for (i = 0; i < nritems; i++) {
1878 btrfs_item_key_to_cpu(eb, &key, i);
1880 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1882 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1885 if (active_node->current == NULL ||
1886 active_node->current->ino < key.objectid) {
1887 if (active_node->current) {
1888 active_node->current->checked = 1;
1889 maybe_free_inode_rec(inode_cache,
1890 active_node->current);
1892 active_node->current = get_inode_rec(inode_cache,
1894 BUG_ON(IS_ERR(active_node->current));
1897 case BTRFS_DIR_ITEM_KEY:
1898 case BTRFS_DIR_INDEX_KEY:
1899 ret = process_dir_item(root, eb, i, &key, active_node);
1901 case BTRFS_INODE_REF_KEY:
1902 ret = process_inode_ref(eb, i, &key, active_node);
1904 case BTRFS_INODE_EXTREF_KEY:
1905 ret = process_inode_extref(eb, i, &key, active_node);
1907 case BTRFS_INODE_ITEM_KEY:
1908 ret = process_inode_item(eb, i, &key, active_node);
1910 case BTRFS_EXTENT_DATA_KEY:
1911 ret = process_file_extent(root, eb, i, &key,
1921 static void reada_walk_down(struct btrfs_root *root,
1922 struct extent_buffer *node, int slot)
1931 level = btrfs_header_level(node);
1935 nritems = btrfs_header_nritems(node);
1936 blocksize = root->nodesize;
1937 for (i = slot; i < nritems; i++) {
1938 bytenr = btrfs_node_blockptr(node, i);
1939 ptr_gen = btrfs_node_ptr_generation(node, i);
1940 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1945 * Check the child node/leaf by the following condition:
1946 * 1. the first item key of the node/leaf should be the same with the one
1948 * 2. block in parent node should match the child node/leaf.
1949 * 3. generation of parent node and child's header should be consistent.
1951 * Or the child node/leaf pointed by the key in parent is not valid.
1953 * We hope to check leaf owner too, but since subvol may share leaves,
1954 * which makes leaf owner check not so strong, key check should be
1955 * sufficient enough for that case.
1957 static int check_child_node(struct btrfs_root *root,
1958 struct extent_buffer *parent, int slot,
1959 struct extent_buffer *child)
1961 struct btrfs_key parent_key;
1962 struct btrfs_key child_key;
1965 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966 if (btrfs_header_level(child) == 0)
1967 btrfs_item_key_to_cpu(child, &child_key, 0);
1969 btrfs_node_key_to_cpu(child, &child_key, 0);
1971 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1974 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975 parent_key.objectid, parent_key.type, parent_key.offset,
1976 child_key.objectid, child_key.type, child_key.offset);
1978 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1980 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981 btrfs_node_blockptr(parent, slot),
1982 btrfs_header_bytenr(child));
1984 if (btrfs_node_ptr_generation(parent, slot) !=
1985 btrfs_header_generation(child)) {
1987 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988 btrfs_header_generation(child),
1989 btrfs_node_ptr_generation(parent, slot));
1995 u64 bytenr[BTRFS_MAX_LEVEL];
1996 u64 refs[BTRFS_MAX_LEVEL];
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000 struct walk_control *wc, int *level,
2001 struct node_refs *nrefs)
2003 enum btrfs_tree_block_status status;
2006 struct extent_buffer *next;
2007 struct extent_buffer *cur;
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2015 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016 refs = nrefs->refs[*level];
2019 ret = btrfs_lookup_extent_info(NULL, root,
2020 path->nodes[*level]->start,
2021 *level, 1, &refs, NULL);
2026 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027 nrefs->refs[*level] = refs;
2031 ret = enter_shared_node(root, path->nodes[*level]->start,
2039 while (*level >= 0) {
2040 WARN_ON(*level < 0);
2041 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042 cur = path->nodes[*level];
2044 if (btrfs_header_level(cur) != *level)
2047 if (path->slots[*level] >= btrfs_header_nritems(cur))
2050 ret = process_one_leaf(root, cur, wc);
2055 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057 blocksize = root->nodesize;
2059 if (bytenr == nrefs->bytenr[*level - 1]) {
2060 refs = nrefs->refs[*level - 1];
2062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063 *level - 1, 1, &refs, NULL);
2067 nrefs->bytenr[*level - 1] = bytenr;
2068 nrefs->refs[*level - 1] = refs;
2073 ret = enter_shared_node(root, bytenr, refs,
2076 path->slots[*level]++;
2081 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083 free_extent_buffer(next);
2084 reada_walk_down(root, cur, path->slots[*level]);
2085 next = read_tree_block(root, bytenr, blocksize,
2087 if (!extent_buffer_uptodate(next)) {
2088 struct btrfs_key node_key;
2090 btrfs_node_key_to_cpu(path->nodes[*level],
2092 path->slots[*level]);
2093 btrfs_add_corrupt_extent_record(root->fs_info,
2095 path->nodes[*level]->start,
2096 root->nodesize, *level);
2102 ret = check_child_node(root, cur, path->slots[*level], next);
2108 if (btrfs_is_leaf(next))
2109 status = btrfs_check_leaf(root, NULL, next);
2111 status = btrfs_check_node(root, NULL, next);
2112 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113 free_extent_buffer(next);
2118 *level = *level - 1;
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = next;
2121 path->slots[*level] = 0;
2124 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129 struct walk_control *wc, int *level)
2132 struct extent_buffer *leaf;
2134 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135 leaf = path->nodes[i];
2136 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2141 free_extent_buffer(path->nodes[*level]);
2142 path->nodes[*level] = NULL;
2143 BUG_ON(*level > wc->active_node);
2144 if (*level == wc->active_node)
2145 leave_shared_node(root, wc, *level);
2152 static int check_root_dir(struct inode_record *rec)
2154 struct inode_backref *backref;
2157 if (!rec->found_inode_item || rec->errors)
2159 if (rec->nlink != 1 || rec->found_link != 0)
2161 if (list_empty(&rec->backrefs))
2163 backref = to_inode_backref(rec->backrefs.next);
2164 if (!backref->found_inode_ref)
2166 if (backref->index != 0 || backref->namelen != 2 ||
2167 memcmp(backref->name, "..", 2))
2169 if (backref->found_dir_index || backref->found_dir_item)
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177 struct btrfs_root *root, struct btrfs_path *path,
2178 struct inode_record *rec)
2180 struct btrfs_inode_item *ei;
2181 struct btrfs_key key;
2184 key.objectid = rec->ino;
2185 key.type = BTRFS_INODE_ITEM_KEY;
2186 key.offset = (u64)-1;
2188 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2192 if (!path->slots[0]) {
2199 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200 if (key.objectid != rec->ino) {
2205 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206 struct btrfs_inode_item);
2207 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208 btrfs_mark_buffer_dirty(path->nodes[0]);
2209 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211 root->root_key.objectid);
2213 btrfs_release_path(path);
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218 struct btrfs_root *root,
2219 struct btrfs_path *path,
2220 struct inode_record *rec)
2224 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225 btrfs_release_path(path);
2227 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_path *path,
2234 struct inode_record *rec)
2236 struct btrfs_inode_item *ei;
2237 struct btrfs_key key;
2240 key.objectid = rec->ino;
2241 key.type = BTRFS_INODE_ITEM_KEY;
2244 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251 /* Since ret == 0, no need to check anything */
2252 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253 struct btrfs_inode_item);
2254 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255 btrfs_mark_buffer_dirty(path->nodes[0]);
2256 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257 printf("reset nbytes for ino %llu root %llu\n",
2258 rec->ino, root->root_key.objectid);
2260 btrfs_release_path(path);
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265 struct cache_tree *inode_cache,
2266 struct inode_record *rec,
2267 struct inode_backref *backref)
2269 struct btrfs_path *path;
2270 struct btrfs_trans_handle *trans;
2271 struct btrfs_dir_item *dir_item;
2272 struct extent_buffer *leaf;
2273 struct btrfs_key key;
2274 struct btrfs_disk_key disk_key;
2275 struct inode_record *dir_rec;
2276 unsigned long name_ptr;
2277 u32 data_size = sizeof(*dir_item) + backref->namelen;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2290 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291 (unsigned long long)rec->ino);
2292 key.objectid = backref->dir;
2293 key.type = BTRFS_DIR_INDEX_KEY;
2294 key.offset = backref->index;
2296 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2299 leaf = path->nodes[0];
2300 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2302 disk_key.objectid = cpu_to_le64(rec->ino);
2303 disk_key.type = BTRFS_INODE_ITEM_KEY;
2304 disk_key.offset = 0;
2306 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308 btrfs_set_dir_data_len(leaf, dir_item, 0);
2309 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310 name_ptr = (unsigned long)(dir_item + 1);
2311 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312 btrfs_mark_buffer_dirty(leaf);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2316 backref->found_dir_index = 1;
2317 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318 BUG_ON(IS_ERR(dir_rec));
2321 dir_rec->found_size += backref->namelen;
2322 if (dir_rec->found_size == dir_rec->isize &&
2323 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325 if (dir_rec->found_size != dir_rec->isize)
2326 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2331 static int delete_dir_index(struct btrfs_root *root,
2332 struct cache_tree *inode_cache,
2333 struct inode_record *rec,
2334 struct inode_backref *backref)
2336 struct btrfs_trans_handle *trans;
2337 struct btrfs_dir_item *di;
2338 struct btrfs_path *path;
2341 path = btrfs_alloc_path();
2345 trans = btrfs_start_transaction(root, 1);
2346 if (IS_ERR(trans)) {
2347 btrfs_free_path(path);
2348 return PTR_ERR(trans);
2352 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353 (unsigned long long)backref->dir,
2354 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355 (unsigned long long)root->objectid);
2357 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358 backref->name, backref->namelen,
2359 backref->index, -1);
2362 btrfs_free_path(path);
2363 btrfs_commit_transaction(trans, root);
2370 ret = btrfs_del_item(trans, root, path);
2372 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2374 btrfs_free_path(path);
2375 btrfs_commit_transaction(trans, root);
2379 static int create_inode_item(struct btrfs_root *root,
2380 struct inode_record *rec,
2381 struct inode_backref *backref, int root_dir)
2383 struct btrfs_trans_handle *trans;
2384 struct btrfs_inode_item inode_item;
2385 time_t now = time(NULL);
2388 trans = btrfs_start_transaction(root, 1);
2389 if (IS_ERR(trans)) {
2390 ret = PTR_ERR(trans);
2394 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395 "be incomplete, please check permissions and content after "
2396 "the fsck completes.\n", (unsigned long long)root->objectid,
2397 (unsigned long long)rec->ino);
2399 memset(&inode_item, 0, sizeof(inode_item));
2400 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2402 btrfs_set_stack_inode_nlink(&inode_item, 1);
2404 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406 if (rec->found_dir_item) {
2407 if (rec->found_file_extent)
2408 fprintf(stderr, "root %llu inode %llu has both a dir "
2409 "item and extents, unsure if it is a dir or a "
2410 "regular file so setting it as a directory\n",
2411 (unsigned long long)root->objectid,
2412 (unsigned long long)rec->ino);
2413 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415 } else if (!rec->found_dir_item) {
2416 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2419 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2428 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2430 btrfs_commit_transaction(trans, root);
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435 struct inode_record *rec,
2436 struct cache_tree *inode_cache,
2439 struct inode_backref *tmp, *backref;
2440 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2444 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445 if (!delete && rec->ino == root_dirid) {
2446 if (!rec->found_inode_item) {
2447 ret = create_inode_item(root, rec, backref, 1);
2454 /* Index 0 for root dir's are special, don't mess with it */
2455 if (rec->ino == root_dirid && backref->index == 0)
2459 ((backref->found_dir_index && !backref->found_inode_ref) ||
2460 (backref->found_dir_index && backref->found_inode_ref &&
2461 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462 ret = delete_dir_index(root, inode_cache, rec, backref);
2466 list_del(&backref->list);
2470 if (!delete && !backref->found_dir_index &&
2471 backref->found_dir_item && backref->found_inode_ref) {
2472 ret = add_missing_dir_index(root, inode_cache, rec,
2477 if (backref->found_dir_item &&
2478 backref->found_dir_index &&
2479 backref->found_dir_index) {
2480 if (!backref->errors &&
2481 backref->found_inode_ref) {
2482 list_del(&backref->list);
2488 if (!delete && (!backref->found_dir_index &&
2489 !backref->found_dir_item &&
2490 backref->found_inode_ref)) {
2491 struct btrfs_trans_handle *trans;
2492 struct btrfs_key location;
2494 ret = check_dir_conflict(root, backref->name,
2500 * let nlink fixing routine to handle it,
2501 * which can do it better.
2506 location.objectid = rec->ino;
2507 location.type = BTRFS_INODE_ITEM_KEY;
2508 location.offset = 0;
2510 trans = btrfs_start_transaction(root, 1);
2511 if (IS_ERR(trans)) {
2512 ret = PTR_ERR(trans);
2515 fprintf(stderr, "adding missing dir index/item pair "
2517 (unsigned long long)rec->ino);
2518 ret = btrfs_insert_dir_item(trans, root, backref->name,
2520 backref->dir, &location,
2521 imode_to_type(rec->imode),
2524 btrfs_commit_transaction(trans, root);
2528 if (!delete && (backref->found_inode_ref &&
2529 backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532 !rec->found_inode_item)) {
2533 ret = create_inode_item(root, rec, backref, 0);
2540 return ret ? ret : repaired;
2544 * To determine the file type for nlink/inode_item repair
2546 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547 * Return -ENOENT if file type is not found.
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2551 struct inode_backref *backref;
2553 /* For inode item recovered case */
2554 if (rec->found_inode_item) {
2555 *type = imode_to_type(rec->imode);
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item) {
2561 *type = backref->filetype;
2569 * To determine the file name for nlink repair
2571 * Return 0 if file name is found, set name and namelen.
2572 * Return -ENOENT if file name is not found.
2574 static int find_file_name(struct inode_record *rec,
2575 char *name, int *namelen)
2577 struct inode_backref *backref;
2579 list_for_each_entry(backref, &rec->backrefs, list) {
2580 if (backref->found_dir_index || backref->found_dir_item ||
2581 backref->found_inode_ref) {
2582 memcpy(name, backref->name, backref->namelen);
2583 *namelen = backref->namelen;
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592 struct btrfs_root *root,
2593 struct btrfs_path *path,
2594 struct inode_record *rec)
2596 struct inode_backref *backref;
2597 struct inode_backref *tmp;
2598 struct btrfs_key key;
2599 struct btrfs_inode_item *inode_item;
2602 /* We don't believe this either, reset it and iterate backref */
2603 rec->found_link = 0;
2605 /* Remove all backref including the valid ones */
2606 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608 backref->index, backref->name,
2609 backref->namelen, 0);
2613 /* remove invalid backref, so it won't be added back */
2614 if (!(backref->found_dir_index &&
2615 backref->found_dir_item &&
2616 backref->found_inode_ref)) {
2617 list_del(&backref->list);
2624 /* Set nlink to 0 */
2625 key.objectid = rec->ino;
2626 key.type = BTRFS_INODE_ITEM_KEY;
2628 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636 struct btrfs_inode_item);
2637 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638 btrfs_mark_buffer_dirty(path->nodes[0]);
2639 btrfs_release_path(path);
2642 * Add back valid inode_ref/dir_item/dir_index,
2643 * add_link() will handle the nlink inc, so new nlink must be correct
2645 list_for_each_entry(backref, &rec->backrefs, list) {
2646 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647 backref->name, backref->namelen,
2648 backref->filetype, &backref->index, 1);
2653 btrfs_release_path(path);
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658 struct btrfs_root *root,
2659 struct btrfs_path *path,
2660 struct inode_record *rec)
2662 char *dir_name = "lost+found";
2663 char namebuf[BTRFS_NAME_LEN] = {0};
2668 int name_recovered = 0;
2669 int type_recovered = 0;
2673 * Get file name and type first before these invalid inode ref
2674 * are deleted by remove_all_invalid_backref()
2676 name_recovered = !find_file_name(rec, namebuf, &namelen);
2677 type_recovered = !find_file_type(rec, &type);
2679 if (!name_recovered) {
2680 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681 rec->ino, rec->ino);
2682 namelen = count_digits(rec->ino);
2683 sprintf(namebuf, "%llu", rec->ino);
2686 if (!type_recovered) {
2687 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2689 type = BTRFS_FT_REG_FILE;
2693 ret = reset_nlink(trans, root, path, rec);
2696 "Failed to reset nlink for inode %llu: %s\n",
2697 rec->ino, strerror(-ret));
2701 if (rec->found_link == 0) {
2702 lost_found_ino = root->highest_inode;
2703 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2708 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2712 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713 dir_name, strerror(-ret));
2716 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717 namebuf, namelen, type, NULL, 1);
2719 * Add ".INO" suffix several times to handle case where
2720 * "FILENAME.INO" is already taken by another file.
2722 while (ret == -EEXIST) {
2724 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2726 if (namelen + count_digits(rec->ino) + 1 >
2731 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2733 namelen += count_digits(rec->ino) + 1;
2734 ret = btrfs_add_link(trans, root, rec->ino,
2735 lost_found_ino, namebuf,
2736 namelen, type, NULL, 1);
2740 "Failed to link the inode %llu to %s dir: %s\n",
2741 rec->ino, dir_name, strerror(-ret));
2745 * Just increase the found_link, don't actually add the
2746 * backref. This will make things easier and this inode
2747 * record will be freed after the repair is done.
2748 * So fsck will not report problem about this inode.
2751 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752 namelen, namebuf, dir_name);
2754 printf("Fixed the nlink of inode %llu\n", rec->ino);
2757 * Clear the flag anyway, or we will loop forever for the same inode
2758 * as it will not be removed from the bad inode list and the dead loop
2761 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762 btrfs_release_path(path);
2767 * Check if there is any normal(reg or prealloc) file extent for given
2769 * This is used to determine the file type when neither its dir_index/item or
2770 * inode_item exists.
2772 * This will *NOT* report error, if any error happens, just consider it does
2773 * not have any normal file extent.
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2777 struct btrfs_path *path;
2778 struct btrfs_key key;
2779 struct btrfs_key found_key;
2780 struct btrfs_file_extent_item *fi;
2784 path = btrfs_alloc_path();
2788 key.type = BTRFS_EXTENT_DATA_KEY;
2791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2796 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797 ret = btrfs_next_leaf(root, path);
2804 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2806 if (found_key.objectid != ino ||
2807 found_key.type != BTRFS_EXTENT_DATA_KEY)
2809 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810 struct btrfs_file_extent_item);
2811 type = btrfs_file_extent_type(path->nodes[0], fi);
2812 if (type != BTRFS_FILE_EXTENT_INLINE) {
2818 btrfs_free_path(path);
2822 static u32 btrfs_type_to_imode(u8 type)
2824 static u32 imode_by_btrfs_type[] = {
2825 [BTRFS_FT_REG_FILE] = S_IFREG,
2826 [BTRFS_FT_DIR] = S_IFDIR,
2827 [BTRFS_FT_CHRDEV] = S_IFCHR,
2828 [BTRFS_FT_BLKDEV] = S_IFBLK,
2829 [BTRFS_FT_FIFO] = S_IFIFO,
2830 [BTRFS_FT_SOCK] = S_IFSOCK,
2831 [BTRFS_FT_SYMLINK] = S_IFLNK,
2834 return imode_by_btrfs_type[(type)];
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root,
2839 struct btrfs_path *path,
2840 struct inode_record *rec)
2844 int type_recovered = 0;
2847 printf("Trying to rebuild inode:%llu\n", rec->ino);
2849 type_recovered = !find_file_type(rec, &filetype);
2852 * Try to determine inode type if type not found.
2854 * For found regular file extent, it must be FILE.
2855 * For found dir_item/index, it must be DIR.
2857 * For undetermined one, use FILE as fallback.
2860 * 1. If found backref(inode_index/item is already handled) to it,
2862 * Need new inode-inode ref structure to allow search for that.
2864 if (!type_recovered) {
2865 if (rec->found_file_extent &&
2866 find_normal_file_extent(root, rec->ino)) {
2868 filetype = BTRFS_FT_REG_FILE;
2869 } else if (rec->found_dir_item) {
2871 filetype = BTRFS_FT_DIR;
2872 } else if (!list_empty(&rec->orphan_extents)) {
2874 filetype = BTRFS_FT_REG_FILE;
2876 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2879 filetype = BTRFS_FT_REG_FILE;
2883 ret = btrfs_new_inode(trans, root, rec->ino,
2884 mode | btrfs_type_to_imode(filetype));
2889 * Here inode rebuild is done, we only rebuild the inode item,
2890 * don't repair the nlink(like move to lost+found).
2891 * That is the job of nlink repair.
2893 * We just fill the record and return
2895 rec->found_dir_item = 1;
2896 rec->imode = mode | btrfs_type_to_imode(filetype);
2898 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899 /* Ensure the inode_nlinks repair function will be called */
2900 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct orphan_data_extent *orphan;
2911 struct orphan_data_extent *tmp;
2914 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2916 * Check for conflicting file extents
2918 * Here we don't know whether the extents is compressed or not,
2919 * so we can only assume it not compressed nor data offset,
2920 * and use its disk_len as extent length.
2922 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923 orphan->offset, orphan->disk_len, 0);
2924 btrfs_release_path(path);
2929 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930 orphan->disk_bytenr, orphan->disk_len);
2931 ret = btrfs_free_extent(trans,
2932 root->fs_info->extent_root,
2933 orphan->disk_bytenr, orphan->disk_len,
2934 0, root->objectid, orphan->objectid,
2939 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940 orphan->offset, orphan->disk_bytenr,
2941 orphan->disk_len, orphan->disk_len);
2945 /* Update file size info */
2946 rec->found_size += orphan->disk_len;
2947 if (rec->found_size == rec->nbytes)
2948 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2950 /* Update the file extent hole info too */
2951 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2955 if (RB_EMPTY_ROOT(&rec->holes))
2956 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2958 list_del(&orphan->list);
2961 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 struct inode_record *rec)
2971 struct rb_node *node;
2972 struct file_extent_hole *hole;
2976 node = rb_first(&rec->holes);
2980 hole = rb_entry(node, struct file_extent_hole, node);
2981 ret = btrfs_punch_hole(trans, root, rec->ino,
2982 hole->start, hole->len);
2985 ret = del_file_extent_hole(&rec->holes, hole->start,
2989 if (RB_EMPTY_ROOT(&rec->holes))
2990 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991 node = rb_first(&rec->holes);
2993 /* special case for a file losing all its file extent */
2995 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996 round_up(rec->isize, root->sectorsize));
3000 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001 rec->ino, root->objectid);
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3008 struct btrfs_trans_handle *trans;
3009 struct btrfs_path *path;
3012 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013 I_ERR_NO_ORPHAN_ITEM |
3014 I_ERR_LINK_COUNT_WRONG |
3015 I_ERR_NO_INODE_ITEM |
3016 I_ERR_FILE_EXTENT_ORPHAN |
3017 I_ERR_FILE_EXTENT_DISCOUNT|
3018 I_ERR_FILE_NBYTES_WRONG)))
3021 path = btrfs_alloc_path();
3026 * For nlink repair, it may create a dir and add link, so
3027 * 2 for parent(256)'s dir_index and dir_item
3028 * 2 for lost+found dir's inode_item and inode_ref
3029 * 1 for the new inode_ref of the file
3030 * 2 for lost+found dir's dir_index and dir_item for the file
3032 trans = btrfs_start_transaction(root, 7);
3033 if (IS_ERR(trans)) {
3034 btrfs_free_path(path);
3035 return PTR_ERR(trans);
3038 if (rec->errors & I_ERR_NO_INODE_ITEM)
3039 ret = repair_inode_no_item(trans, root, path, rec);
3040 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043 ret = repair_inode_discount_extent(trans, root, path, rec);
3044 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045 ret = repair_inode_isize(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047 ret = repair_inode_orphan_item(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049 ret = repair_inode_nlinks(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051 ret = repair_inode_nbytes(trans, root, path, rec);
3052 btrfs_commit_transaction(trans, root);
3053 btrfs_free_path(path);
3057 static int check_inode_recs(struct btrfs_root *root,
3058 struct cache_tree *inode_cache)
3060 struct cache_extent *cache;
3061 struct ptr_node *node;
3062 struct inode_record *rec;
3063 struct inode_backref *backref;
3068 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3070 if (btrfs_root_refs(&root->root_item) == 0) {
3071 if (!cache_tree_empty(inode_cache))
3072 fprintf(stderr, "warning line %d\n", __LINE__);
3077 * We need to record the highest inode number for later 'lost+found'
3079 * We must select an ino not used/referred by any existing inode, or
3080 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081 * this may cause 'lost+found' dir has wrong nlinks.
3083 cache = last_cache_extent(inode_cache);
3085 node = container_of(cache, struct ptr_node, cache);
3087 if (rec->ino > root->highest_inode)
3088 root->highest_inode = rec->ino;
3092 * We need to repair backrefs first because we could change some of the
3093 * errors in the inode recs.
3095 * We also need to go through and delete invalid backrefs first and then
3096 * add the correct ones second. We do this because we may get EEXIST
3097 * when adding back the correct index because we hadn't yet deleted the
3100 * For example, if we were missing a dir index then the directories
3101 * isize would be wrong, so if we fixed the isize to what we thought it
3102 * would be and then fixed the backref we'd still have a invalid fs, so
3103 * we need to add back the dir index and then check to see if the isize
3108 if (stage == 3 && !err)
3111 cache = search_cache_extent(inode_cache, 0);
3112 while (repair && cache) {
3113 node = container_of(cache, struct ptr_node, cache);
3115 cache = next_cache_extent(cache);
3117 /* Need to free everything up and rescan */
3119 remove_cache_extent(inode_cache, &node->cache);
3121 free_inode_rec(rec);
3125 if (list_empty(&rec->backrefs))
3128 ret = repair_inode_backrefs(root, rec, inode_cache,
3142 rec = get_inode_rec(inode_cache, root_dirid, 0);
3143 BUG_ON(IS_ERR(rec));
3145 ret = check_root_dir(rec);
3147 fprintf(stderr, "root %llu root dir %llu error\n",
3148 (unsigned long long)root->root_key.objectid,
3149 (unsigned long long)root_dirid);
3150 print_inode_error(root, rec);
3155 struct btrfs_trans_handle *trans;
3157 trans = btrfs_start_transaction(root, 1);
3158 if (IS_ERR(trans)) {
3159 err = PTR_ERR(trans);
3164 "root %llu missing its root dir, recreating\n",
3165 (unsigned long long)root->objectid);
3167 ret = btrfs_make_root_dir(trans, root, root_dirid);
3170 btrfs_commit_transaction(trans, root);
3174 fprintf(stderr, "root %llu root dir %llu not found\n",
3175 (unsigned long long)root->root_key.objectid,
3176 (unsigned long long)root_dirid);
3180 cache = search_cache_extent(inode_cache, 0);
3183 node = container_of(cache, struct ptr_node, cache);
3185 remove_cache_extent(inode_cache, &node->cache);
3187 if (rec->ino == root_dirid ||
3188 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189 free_inode_rec(rec);
3193 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194 ret = check_orphan_item(root, rec->ino);
3196 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197 if (can_free_inode_rec(rec)) {
3198 free_inode_rec(rec);
3203 if (!rec->found_inode_item)
3204 rec->errors |= I_ERR_NO_INODE_ITEM;
3205 if (rec->found_link != rec->nlink)
3206 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3208 ret = try_repair_inode(root, rec);
3209 if (ret == 0 && can_free_inode_rec(rec)) {
3210 free_inode_rec(rec);
3216 if (!(repair && ret == 0))
3218 print_inode_error(root, rec);
3219 list_for_each_entry(backref, &rec->backrefs, list) {
3220 if (!backref->found_dir_item)
3221 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222 if (!backref->found_dir_index)
3223 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224 if (!backref->found_inode_ref)
3225 backref->errors |= REF_ERR_NO_INODE_REF;
3226 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227 " namelen %u name %s filetype %d errors %x",
3228 (unsigned long long)backref->dir,
3229 (unsigned long long)backref->index,
3230 backref->namelen, backref->name,
3231 backref->filetype, backref->errors);
3232 print_ref_error(backref->errors);
3234 free_inode_rec(rec);
3236 return (error > 0) ? -1 : 0;
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3242 struct cache_extent *cache;
3243 struct root_record *rec = NULL;
3246 cache = lookup_cache_extent(root_cache, objectid, 1);
3248 rec = container_of(cache, struct root_record, cache);
3250 rec = calloc(1, sizeof(*rec));
3252 return ERR_PTR(-ENOMEM);
3253 rec->objectid = objectid;
3254 INIT_LIST_HEAD(&rec->backrefs);
3255 rec->cache.start = objectid;
3256 rec->cache.size = 1;
3258 ret = insert_cache_extent(root_cache, &rec->cache);
3260 return ERR_PTR(-EEXIST);
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266 u64 ref_root, u64 dir, u64 index,
3267 const char *name, int namelen)
3269 struct root_backref *backref;
3271 list_for_each_entry(backref, &rec->backrefs, list) {
3272 if (backref->ref_root != ref_root || backref->dir != dir ||
3273 backref->namelen != namelen)
3275 if (memcmp(name, backref->name, namelen))
3280 backref = calloc(1, sizeof(*backref) + namelen + 1);
3283 backref->ref_root = ref_root;
3285 backref->index = index;
3286 backref->namelen = namelen;
3287 memcpy(backref->name, name, namelen);
3288 backref->name[namelen] = '\0';
3289 list_add_tail(&backref->list, &rec->backrefs);
3293 static void free_root_record(struct cache_extent *cache)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = container_of(cache, struct root_record, cache);
3299 while (!list_empty(&rec->backrefs)) {
3300 backref = to_root_backref(rec->backrefs.next);
3301 list_del(&backref->list);
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3310 static int add_root_backref(struct cache_tree *root_cache,
3311 u64 root_id, u64 ref_root, u64 dir, u64 index,
3312 const char *name, int namelen,
3313 int item_type, int errors)
3315 struct root_record *rec;
3316 struct root_backref *backref;
3318 rec = get_root_rec(root_cache, root_id);
3319 BUG_ON(IS_ERR(rec));
3320 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3323 backref->errors |= errors;
3325 if (item_type != BTRFS_DIR_ITEM_KEY) {
3326 if (backref->found_dir_index || backref->found_back_ref ||
3327 backref->found_forward_ref) {
3328 if (backref->index != index)
3329 backref->errors |= REF_ERR_INDEX_UNMATCH;
3331 backref->index = index;
3335 if (item_type == BTRFS_DIR_ITEM_KEY) {
3336 if (backref->found_forward_ref)
3338 backref->found_dir_item = 1;
3339 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340 backref->found_dir_index = 1;
3341 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342 if (backref->found_forward_ref)
3343 backref->errors |= REF_ERR_DUP_ROOT_REF;
3344 else if (backref->found_dir_item)
3346 backref->found_forward_ref = 1;
3347 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348 if (backref->found_back_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350 backref->found_back_ref = 1;
3355 if (backref->found_forward_ref && backref->found_dir_item)
3356 backref->reachable = 1;
3360 static int merge_root_recs(struct btrfs_root *root,
3361 struct cache_tree *src_cache,
3362 struct cache_tree *dst_cache)
3364 struct cache_extent *cache;
3365 struct ptr_node *node;
3366 struct inode_record *rec;
3367 struct inode_backref *backref;
3370 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371 free_inode_recs_tree(src_cache);
3376 cache = search_cache_extent(src_cache, 0);
3379 node = container_of(cache, struct ptr_node, cache);
3381 remove_cache_extent(src_cache, &node->cache);
3384 ret = is_child_root(root, root->objectid, rec->ino);
3390 list_for_each_entry(backref, &rec->backrefs, list) {
3391 BUG_ON(backref->found_inode_ref);
3392 if (backref->found_dir_item)
3393 add_root_backref(dst_cache, rec->ino,
3394 root->root_key.objectid, backref->dir,
3395 backref->index, backref->name,
3396 backref->namelen, BTRFS_DIR_ITEM_KEY,
3398 if (backref->found_dir_index)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_INDEX_KEY,
3406 free_inode_rec(rec);
3413 static int check_root_refs(struct btrfs_root *root,
3414 struct cache_tree *root_cache)
3416 struct root_record *rec;
3417 struct root_record *ref_root;
3418 struct root_backref *backref;
3419 struct cache_extent *cache;
3425 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426 BUG_ON(IS_ERR(rec));
3429 /* fixme: this can not detect circular references */
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0)
3442 list_for_each_entry(backref, &rec->backrefs, list) {
3443 if (!backref->reachable)
3446 ref_root = get_root_rec(root_cache,
3448 BUG_ON(IS_ERR(ref_root));
3449 if (ref_root->found_ref > 0)
3452 backref->reachable = 0;
3454 if (rec->found_ref == 0)
3460 cache = search_cache_extent(root_cache, 0);
3464 rec = container_of(cache, struct root_record, cache);
3465 cache = next_cache_extent(cache);
3467 if (rec->found_ref == 0 &&
3468 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470 ret = check_orphan_item(root->fs_info->tree_root,
3476 * If we don't have a root item then we likely just have
3477 * a dir item in a snapshot for this root but no actual
3478 * ref key or anything so it's meaningless.
3480 if (!rec->found_root_item)
3483 fprintf(stderr, "fs tree %llu not referenced\n",
3484 (unsigned long long)rec->objectid);
3488 if (rec->found_ref > 0 && !rec->found_root_item)
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->found_dir_item)
3492 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493 if (!backref->found_dir_index)
3494 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495 if (!backref->found_back_ref)
3496 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497 if (!backref->found_forward_ref)
3498 backref->errors |= REF_ERR_NO_ROOT_REF;
3499 if (backref->reachable && backref->errors)
3506 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507 (unsigned long long)rec->objectid, rec->found_ref,
3508 rec->found_root_item ? "" : "not found");
3510 list_for_each_entry(backref, &rec->backrefs, list) {
3511 if (!backref->reachable)
3513 if (!backref->errors && rec->found_root_item)
3515 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516 " index %llu namelen %u name %s errors %x\n",
3517 (unsigned long long)backref->ref_root,
3518 (unsigned long long)backref->dir,
3519 (unsigned long long)backref->index,
3520 backref->namelen, backref->name,
3522 print_ref_error(backref->errors);
3525 return errors > 0 ? 1 : 0;
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529 struct btrfs_key *key,
3530 struct cache_tree *root_cache)
3536 struct btrfs_root_ref *ref;
3537 char namebuf[BTRFS_NAME_LEN];
3540 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3542 dirid = btrfs_root_ref_dirid(eb, ref);
3543 index = btrfs_root_ref_sequence(eb, ref);
3544 name_len = btrfs_root_ref_name_len(eb, ref);
3546 if (name_len <= BTRFS_NAME_LEN) {
3550 len = BTRFS_NAME_LEN;
3551 error = REF_ERR_NAME_TOO_LONG;
3553 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3555 if (key->type == BTRFS_ROOT_REF_KEY) {
3556 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557 index, namebuf, len, key->type, error);
3559 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560 index, namebuf, len, key->type, error);
3565 static void free_corrupt_block(struct cache_extent *cache)
3567 struct btrfs_corrupt_block *corrupt;
3569 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3576 * Repair the btree of the given root.
3578 * The fix is to remove the node key in corrupt_blocks cache_tree.
3579 * and rebalance the tree.
3580 * After the fix, the btree should be writeable.
3582 static int repair_btree(struct btrfs_root *root,
3583 struct cache_tree *corrupt_blocks)
3585 struct btrfs_trans_handle *trans;
3586 struct btrfs_path *path;
3587 struct btrfs_corrupt_block *corrupt;
3588 struct cache_extent *cache;
3589 struct btrfs_key key;
3594 if (cache_tree_empty(corrupt_blocks))
3597 path = btrfs_alloc_path();
3601 trans = btrfs_start_transaction(root, 1);
3602 if (IS_ERR(trans)) {
3603 ret = PTR_ERR(trans);
3604 fprintf(stderr, "Error starting transaction: %s\n",
3608 cache = first_cache_extent(corrupt_blocks);
3610 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612 level = corrupt->level;
3613 path->lowest_level = level;
3614 key.objectid = corrupt->key.objectid;
3615 key.type = corrupt->key.type;
3616 key.offset = corrupt->key.offset;
3619 * Here we don't want to do any tree balance, since it may
3620 * cause a balance with corrupted brother leaf/node,
3621 * so ins_len set to 0 here.
3622 * Balance will be done after all corrupt node/leaf is deleted.
3624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3627 offset = btrfs_node_blockptr(path->nodes[level],
3628 path->slots[level]);
3630 /* Remove the ptr */
3631 ret = btrfs_del_ptr(trans, root, path, level,
3632 path->slots[level]);
3636 * Remove the corresponding extent
3637 * return value is not concerned.
3639 btrfs_release_path(path);
3640 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641 0, root->root_key.objectid,
3643 cache = next_cache_extent(cache);
3646 /* Balance the btree using btrfs_search_slot() */
3647 cache = first_cache_extent(corrupt_blocks);
3649 corrupt = container_of(cache, struct btrfs_corrupt_block,
3651 memcpy(&key, &corrupt->key, sizeof(key));
3652 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3655 /* return will always >0 since it won't find the item */
3657 btrfs_release_path(path);
3658 cache = next_cache_extent(cache);
3661 btrfs_commit_transaction(trans, root);
3663 btrfs_free_path(path);
3667 static int check_fs_root(struct btrfs_root *root,
3668 struct cache_tree *root_cache,
3669 struct walk_control *wc)
3675 struct btrfs_path path;
3676 struct shared_node root_node;
3677 struct root_record *rec;
3678 struct btrfs_root_item *root_item = &root->root_item;
3679 struct cache_tree corrupt_blocks;
3680 struct orphan_data_extent *orphan;
3681 struct orphan_data_extent *tmp;
3682 enum btrfs_tree_block_status status;
3683 struct node_refs nrefs;
3686 * Reuse the corrupt_block cache tree to record corrupted tree block
3688 * Unlike the usage in extent tree check, here we do it in a per
3689 * fs/subvol tree base.
3691 cache_tree_init(&corrupt_blocks);
3692 root->fs_info->corrupt_blocks = &corrupt_blocks;
3694 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695 rec = get_root_rec(root_cache, root->root_key.objectid);
3696 BUG_ON(IS_ERR(rec));
3697 if (btrfs_root_refs(root_item) > 0)
3698 rec->found_root_item = 1;
3701 btrfs_init_path(&path);
3702 memset(&root_node, 0, sizeof(root_node));
3703 cache_tree_init(&root_node.root_cache);
3704 cache_tree_init(&root_node.inode_cache);
3705 memset(&nrefs, 0, sizeof(nrefs));
3707 /* Move the orphan extent record to corresponding inode_record */
3708 list_for_each_entry_safe(orphan, tmp,
3709 &root->orphan_data_extents, list) {
3710 struct inode_record *inode;
3712 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3714 BUG_ON(IS_ERR(inode));
3715 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716 list_move(&orphan->list, &inode->orphan_extents);
3719 level = btrfs_header_level(root->node);
3720 memset(wc->nodes, 0, sizeof(wc->nodes));
3721 wc->nodes[level] = &root_node;
3722 wc->active_node = level;
3723 wc->root_level = level;
3725 /* We may not have checked the root block, lets do that now */
3726 if (btrfs_is_leaf(root->node))
3727 status = btrfs_check_leaf(root, NULL, root->node);
3729 status = btrfs_check_node(root, NULL, root->node);
3730 if (status != BTRFS_TREE_BLOCK_CLEAN)
3733 if (btrfs_root_refs(root_item) > 0 ||
3734 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735 path.nodes[level] = root->node;
3736 extent_buffer_get(root->node);
3737 path.slots[level] = 0;
3739 struct btrfs_key key;
3740 struct btrfs_disk_key found_key;
3742 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743 level = root_item->drop_level;
3744 path.lowest_level = level;
3745 if (level > btrfs_header_level(root->node) ||
3746 level >= BTRFS_MAX_LEVEL) {
3747 error("ignoring invalid drop level: %u", level);
3750 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3753 btrfs_node_key(path.nodes[level], &found_key,
3755 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3756 sizeof(found_key)));
3760 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3766 wret = walk_up_tree(root, &path, wc, &level);
3773 btrfs_release_path(&path);
3775 if (!cache_tree_empty(&corrupt_blocks)) {
3776 struct cache_extent *cache;
3777 struct btrfs_corrupt_block *corrupt;
3779 printf("The following tree block(s) is corrupted in tree %llu:\n",
3780 root->root_key.objectid);
3781 cache = first_cache_extent(&corrupt_blocks);
3783 corrupt = container_of(cache,
3784 struct btrfs_corrupt_block,
3786 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3787 cache->start, corrupt->level,
3788 corrupt->key.objectid, corrupt->key.type,
3789 corrupt->key.offset);
3790 cache = next_cache_extent(cache);
3793 printf("Try to repair the btree for root %llu\n",
3794 root->root_key.objectid);
3795 ret = repair_btree(root, &corrupt_blocks);
3797 fprintf(stderr, "Failed to repair btree: %s\n",
3800 printf("Btree for root %llu is fixed\n",
3801 root->root_key.objectid);
3805 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3809 if (root_node.current) {
3810 root_node.current->checked = 1;
3811 maybe_free_inode_rec(&root_node.inode_cache,
3815 err = check_inode_recs(root, &root_node.inode_cache);
3819 free_corrupt_blocks_tree(&corrupt_blocks);
3820 root->fs_info->corrupt_blocks = NULL;
3821 free_orphan_data_extents(&root->orphan_data_extents);
3825 static int fs_root_objectid(u64 objectid)
3827 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3828 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3830 return is_fstree(objectid);
3833 static int check_fs_roots(struct btrfs_root *root,
3834 struct cache_tree *root_cache)
3836 struct btrfs_path path;
3837 struct btrfs_key key;
3838 struct walk_control wc;
3839 struct extent_buffer *leaf, *tree_node;
3840 struct btrfs_root *tmp_root;
3841 struct btrfs_root *tree_root = root->fs_info->tree_root;
3845 if (ctx.progress_enabled) {
3846 ctx.tp = TASK_FS_ROOTS;
3847 task_start(ctx.info);
3851 * Just in case we made any changes to the extent tree that weren't
3852 * reflected into the free space cache yet.
3855 reset_cached_block_groups(root->fs_info);
3856 memset(&wc, 0, sizeof(wc));
3857 cache_tree_init(&wc.shared);
3858 btrfs_init_path(&path);
3863 key.type = BTRFS_ROOT_ITEM_KEY;
3864 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3869 tree_node = tree_root->node;
3871 if (tree_node != tree_root->node) {
3872 free_root_recs_tree(root_cache);
3873 btrfs_release_path(&path);
3876 leaf = path.nodes[0];
3877 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3878 ret = btrfs_next_leaf(tree_root, &path);
3884 leaf = path.nodes[0];
3886 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3887 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3888 fs_root_objectid(key.objectid)) {
3889 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3890 tmp_root = btrfs_read_fs_root_no_cache(
3891 root->fs_info, &key);
3893 key.offset = (u64)-1;
3894 tmp_root = btrfs_read_fs_root(
3895 root->fs_info, &key);
3897 if (IS_ERR(tmp_root)) {
3901 ret = check_fs_root(tmp_root, root_cache, &wc);
3902 if (ret == -EAGAIN) {
3903 free_root_recs_tree(root_cache);
3904 btrfs_release_path(&path);
3909 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3910 btrfs_free_fs_root(tmp_root);
3911 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3912 key.type == BTRFS_ROOT_BACKREF_KEY) {
3913 process_root_ref(leaf, path.slots[0], &key,
3920 btrfs_release_path(&path);
3922 free_extent_cache_tree(&wc.shared);
3923 if (!cache_tree_empty(&wc.shared))
3924 fprintf(stderr, "warning line %d\n", __LINE__);
3926 task_stop(ctx.info);
3931 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3934 struct extent_backref *back;
3935 struct tree_backref *tback;
3936 struct data_backref *dback;
3940 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3941 back = rb_node_to_extent_backref(n);
3942 if (!back->found_extent_tree) {
3946 if (back->is_data) {
3947 dback = to_data_backref(back);
3948 fprintf(stderr, "Backref %llu %s %llu"
3949 " owner %llu offset %llu num_refs %lu"
3950 " not found in extent tree\n",
3951 (unsigned long long)rec->start,
3952 back->full_backref ?
3954 back->full_backref ?
3955 (unsigned long long)dback->parent:
3956 (unsigned long long)dback->root,
3957 (unsigned long long)dback->owner,
3958 (unsigned long long)dback->offset,
3959 (unsigned long)dback->num_refs);
3961 tback = to_tree_backref(back);
3962 fprintf(stderr, "Backref %llu parent %llu"
3963 " root %llu not found in extent tree\n",
3964 (unsigned long long)rec->start,
3965 (unsigned long long)tback->parent,
3966 (unsigned long long)tback->root);
3969 if (!back->is_data && !back->found_ref) {
3973 tback = to_tree_backref(back);
3974 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3975 (unsigned long long)rec->start,
3976 back->full_backref ? "parent" : "root",
3977 back->full_backref ?
3978 (unsigned long long)tback->parent :
3979 (unsigned long long)tback->root, back);
3981 if (back->is_data) {
3982 dback = to_data_backref(back);
3983 if (dback->found_ref != dback->num_refs) {
3987 fprintf(stderr, "Incorrect local backref count"
3988 " on %llu %s %llu owner %llu"
3989 " offset %llu found %u wanted %u back %p\n",
3990 (unsigned long long)rec->start,
3991 back->full_backref ?
3993 back->full_backref ?
3994 (unsigned long long)dback->parent:
3995 (unsigned long long)dback->root,
3996 (unsigned long long)dback->owner,
3997 (unsigned long long)dback->offset,
3998 dback->found_ref, dback->num_refs, back);
4000 if (dback->disk_bytenr != rec->start) {
4004 fprintf(stderr, "Backref disk bytenr does not"
4005 " match extent record, bytenr=%llu, "
4006 "ref bytenr=%llu\n",
4007 (unsigned long long)rec->start,
4008 (unsigned long long)dback->disk_bytenr);
4011 if (dback->bytes != rec->nr) {
4015 fprintf(stderr, "Backref bytes do not match "
4016 "extent backref, bytenr=%llu, ref "
4017 "bytes=%llu, backref bytes=%llu\n",
4018 (unsigned long long)rec->start,
4019 (unsigned long long)rec->nr,
4020 (unsigned long long)dback->bytes);
4023 if (!back->is_data) {
4026 dback = to_data_backref(back);
4027 found += dback->found_ref;
4030 if (found != rec->refs) {
4034 fprintf(stderr, "Incorrect global backref count "
4035 "on %llu found %llu wanted %llu\n",
4036 (unsigned long long)rec->start,
4037 (unsigned long long)found,
4038 (unsigned long long)rec->refs);
4044 static void __free_one_backref(struct rb_node *node)
4046 struct extent_backref *back = rb_node_to_extent_backref(node);
4051 static void free_all_extent_backrefs(struct extent_record *rec)
4053 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4056 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4057 struct cache_tree *extent_cache)
4059 struct cache_extent *cache;
4060 struct extent_record *rec;
4063 cache = first_cache_extent(extent_cache);
4066 rec = container_of(cache, struct extent_record, cache);
4067 remove_cache_extent(extent_cache, cache);
4068 free_all_extent_backrefs(rec);
4073 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4074 struct extent_record *rec)
4076 if (rec->content_checked && rec->owner_ref_checked &&
4077 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4078 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4079 !rec->bad_full_backref && !rec->crossing_stripes &&
4080 !rec->wrong_chunk_type) {
4081 remove_cache_extent(extent_cache, &rec->cache);
4082 free_all_extent_backrefs(rec);
4083 list_del_init(&rec->list);
4089 static int check_owner_ref(struct btrfs_root *root,
4090 struct extent_record *rec,
4091 struct extent_buffer *buf)
4093 struct extent_backref *node, *tmp;
4094 struct tree_backref *back;
4095 struct btrfs_root *ref_root;
4096 struct btrfs_key key;
4097 struct btrfs_path path;
4098 struct extent_buffer *parent;
4103 rbtree_postorder_for_each_entry_safe(node, tmp,
4104 &rec->backref_tree, node) {
4107 if (!node->found_ref)
4109 if (node->full_backref)
4111 back = to_tree_backref(node);
4112 if (btrfs_header_owner(buf) == back->root)
4115 BUG_ON(rec->is_root);
4117 /* try to find the block by search corresponding fs tree */
4118 key.objectid = btrfs_header_owner(buf);
4119 key.type = BTRFS_ROOT_ITEM_KEY;
4120 key.offset = (u64)-1;
4122 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4123 if (IS_ERR(ref_root))
4126 level = btrfs_header_level(buf);
4128 btrfs_item_key_to_cpu(buf, &key, 0);
4130 btrfs_node_key_to_cpu(buf, &key, 0);
4132 btrfs_init_path(&path);
4133 path.lowest_level = level + 1;
4134 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4138 parent = path.nodes[level + 1];
4139 if (parent && buf->start == btrfs_node_blockptr(parent,
4140 path.slots[level + 1]))
4143 btrfs_release_path(&path);
4144 return found ? 0 : 1;
4147 static int is_extent_tree_record(struct extent_record *rec)
4149 struct extent_backref *ref, *tmp;
4150 struct tree_backref *back;
4153 rbtree_postorder_for_each_entry_safe(ref, tmp,
4154 &rec->backref_tree, node) {
4157 back = to_tree_backref(ref);
4158 if (ref->full_backref)
4160 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4167 static int record_bad_block_io(struct btrfs_fs_info *info,
4168 struct cache_tree *extent_cache,
4171 struct extent_record *rec;
4172 struct cache_extent *cache;
4173 struct btrfs_key key;
4175 cache = lookup_cache_extent(extent_cache, start, len);
4179 rec = container_of(cache, struct extent_record, cache);
4180 if (!is_extent_tree_record(rec))
4183 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4184 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4187 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4188 struct extent_buffer *buf, int slot)
4190 if (btrfs_header_level(buf)) {
4191 struct btrfs_key_ptr ptr1, ptr2;
4193 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4194 sizeof(struct btrfs_key_ptr));
4195 read_extent_buffer(buf, &ptr2,
4196 btrfs_node_key_ptr_offset(slot + 1),
4197 sizeof(struct btrfs_key_ptr));
4198 write_extent_buffer(buf, &ptr1,
4199 btrfs_node_key_ptr_offset(slot + 1),
4200 sizeof(struct btrfs_key_ptr));
4201 write_extent_buffer(buf, &ptr2,
4202 btrfs_node_key_ptr_offset(slot),
4203 sizeof(struct btrfs_key_ptr));
4205 struct btrfs_disk_key key;
4206 btrfs_node_key(buf, &key, 0);
4207 btrfs_fixup_low_keys(root, path, &key,
4208 btrfs_header_level(buf) + 1);
4211 struct btrfs_item *item1, *item2;
4212 struct btrfs_key k1, k2;
4213 char *item1_data, *item2_data;
4214 u32 item1_offset, item2_offset, item1_size, item2_size;
4216 item1 = btrfs_item_nr(slot);
4217 item2 = btrfs_item_nr(slot + 1);
4218 btrfs_item_key_to_cpu(buf, &k1, slot);
4219 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4220 item1_offset = btrfs_item_offset(buf, item1);
4221 item2_offset = btrfs_item_offset(buf, item2);
4222 item1_size = btrfs_item_size(buf, item1);
4223 item2_size = btrfs_item_size(buf, item2);
4225 item1_data = malloc(item1_size);
4228 item2_data = malloc(item2_size);
4234 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4235 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4237 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4238 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4242 btrfs_set_item_offset(buf, item1, item2_offset);
4243 btrfs_set_item_offset(buf, item2, item1_offset);
4244 btrfs_set_item_size(buf, item1, item2_size);
4245 btrfs_set_item_size(buf, item2, item1_size);
4247 path->slots[0] = slot;
4248 btrfs_set_item_key_unsafe(root, path, &k2);
4249 path->slots[0] = slot + 1;
4250 btrfs_set_item_key_unsafe(root, path, &k1);
4255 static int fix_key_order(struct btrfs_trans_handle *trans,
4256 struct btrfs_root *root,
4257 struct btrfs_path *path)
4259 struct extent_buffer *buf;
4260 struct btrfs_key k1, k2;
4262 int level = path->lowest_level;
4265 buf = path->nodes[level];
4266 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4268 btrfs_node_key_to_cpu(buf, &k1, i);
4269 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4271 btrfs_item_key_to_cpu(buf, &k1, i);
4272 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4274 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4276 ret = swap_values(root, path, buf, i);
4279 btrfs_mark_buffer_dirty(buf);
4285 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4286 struct btrfs_root *root,
4287 struct btrfs_path *path,
4288 struct extent_buffer *buf, int slot)
4290 struct btrfs_key key;
4291 int nritems = btrfs_header_nritems(buf);
4293 btrfs_item_key_to_cpu(buf, &key, slot);
4295 /* These are all the keys we can deal with missing. */
4296 if (key.type != BTRFS_DIR_INDEX_KEY &&
4297 key.type != BTRFS_EXTENT_ITEM_KEY &&
4298 key.type != BTRFS_METADATA_ITEM_KEY &&
4299 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4300 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4303 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4304 (unsigned long long)key.objectid, key.type,
4305 (unsigned long long)key.offset, slot, buf->start);
4306 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4307 btrfs_item_nr_offset(slot + 1),
4308 sizeof(struct btrfs_item) *
4309 (nritems - slot - 1));
4310 btrfs_set_header_nritems(buf, nritems - 1);
4312 struct btrfs_disk_key disk_key;
4314 btrfs_item_key(buf, &disk_key, 0);
4315 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4317 btrfs_mark_buffer_dirty(buf);
4321 static int fix_item_offset(struct btrfs_trans_handle *trans,
4322 struct btrfs_root *root,
4323 struct btrfs_path *path)
4325 struct extent_buffer *buf;
4329 /* We should only get this for leaves */
4330 BUG_ON(path->lowest_level);
4331 buf = path->nodes[0];
4333 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4334 unsigned int shift = 0, offset;
4336 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4337 BTRFS_LEAF_DATA_SIZE(root)) {
4338 if (btrfs_item_end_nr(buf, i) >
4339 BTRFS_LEAF_DATA_SIZE(root)) {
4340 ret = delete_bogus_item(trans, root, path,
4344 fprintf(stderr, "item is off the end of the "
4345 "leaf, can't fix\n");
4349 shift = BTRFS_LEAF_DATA_SIZE(root) -
4350 btrfs_item_end_nr(buf, i);
4351 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4352 btrfs_item_offset_nr(buf, i - 1)) {
4353 if (btrfs_item_end_nr(buf, i) >
4354 btrfs_item_offset_nr(buf, i - 1)) {
4355 ret = delete_bogus_item(trans, root, path,
4359 fprintf(stderr, "items overlap, can't fix\n");
4363 shift = btrfs_item_offset_nr(buf, i - 1) -
4364 btrfs_item_end_nr(buf, i);
4369 printf("Shifting item nr %d by %u bytes in block %llu\n",
4370 i, shift, (unsigned long long)buf->start);
4371 offset = btrfs_item_offset_nr(buf, i);
4372 memmove_extent_buffer(buf,
4373 btrfs_leaf_data(buf) + offset + shift,
4374 btrfs_leaf_data(buf) + offset,
4375 btrfs_item_size_nr(buf, i));
4376 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4378 btrfs_mark_buffer_dirty(buf);
4382 * We may have moved things, in which case we want to exit so we don't
4383 * write those changes out. Once we have proper abort functionality in
4384 * progs this can be changed to something nicer.
4391 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4392 * then just return -EIO.
4394 static int try_to_fix_bad_block(struct btrfs_root *root,
4395 struct extent_buffer *buf,
4396 enum btrfs_tree_block_status status)
4398 struct btrfs_trans_handle *trans;
4399 struct ulist *roots;
4400 struct ulist_node *node;
4401 struct btrfs_root *search_root;
4402 struct btrfs_path *path;
4403 struct ulist_iterator iter;
4404 struct btrfs_key root_key, key;
4407 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4408 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4411 path = btrfs_alloc_path();
4415 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4418 btrfs_free_path(path);
4422 ULIST_ITER_INIT(&iter);
4423 while ((node = ulist_next(roots, &iter))) {
4424 root_key.objectid = node->val;
4425 root_key.type = BTRFS_ROOT_ITEM_KEY;
4426 root_key.offset = (u64)-1;
4428 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4435 trans = btrfs_start_transaction(search_root, 0);
4436 if (IS_ERR(trans)) {
4437 ret = PTR_ERR(trans);
4441 path->lowest_level = btrfs_header_level(buf);
4442 path->skip_check_block = 1;
4443 if (path->lowest_level)
4444 btrfs_node_key_to_cpu(buf, &key, 0);
4446 btrfs_item_key_to_cpu(buf, &key, 0);
4447 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4450 btrfs_commit_transaction(trans, search_root);
4453 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4454 ret = fix_key_order(trans, search_root, path);
4455 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4456 ret = fix_item_offset(trans, search_root, path);
4458 btrfs_commit_transaction(trans, search_root);
4461 btrfs_release_path(path);
4462 btrfs_commit_transaction(trans, search_root);
4465 btrfs_free_path(path);
4469 static int check_block(struct btrfs_root *root,
4470 struct cache_tree *extent_cache,
4471 struct extent_buffer *buf, u64 flags)
4473 struct extent_record *rec;
4474 struct cache_extent *cache;
4475 struct btrfs_key key;
4476 enum btrfs_tree_block_status status;
4480 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4483 rec = container_of(cache, struct extent_record, cache);
4484 rec->generation = btrfs_header_generation(buf);
4486 level = btrfs_header_level(buf);
4487 if (btrfs_header_nritems(buf) > 0) {
4490 btrfs_item_key_to_cpu(buf, &key, 0);
4492 btrfs_node_key_to_cpu(buf, &key, 0);
4494 rec->info_objectid = key.objectid;
4496 rec->info_level = level;
4498 if (btrfs_is_leaf(buf))
4499 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4501 status = btrfs_check_node(root, &rec->parent_key, buf);
4503 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4505 status = try_to_fix_bad_block(root, buf, status);
4506 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4508 fprintf(stderr, "bad block %llu\n",
4509 (unsigned long long)buf->start);
4512 * Signal to callers we need to start the scan over
4513 * again since we'll have cowed blocks.
4518 rec->content_checked = 1;
4519 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4520 rec->owner_ref_checked = 1;
4522 ret = check_owner_ref(root, rec, buf);
4524 rec->owner_ref_checked = 1;
4528 maybe_free_extent_rec(extent_cache, rec);
4533 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4534 u64 parent, u64 root)
4536 struct rb_node *node;
4537 struct tree_backref *back = NULL;
4538 struct tree_backref match = {
4545 match.parent = parent;
4546 match.node.full_backref = 1;
4551 node = rb_search(&rec->backref_tree, &match.node.node,
4552 (rb_compare_keys)compare_extent_backref, NULL);
4554 back = to_tree_backref(rb_node_to_extent_backref(node));
4559 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4560 u64 parent, u64 root)
4562 struct tree_backref *ref = malloc(sizeof(*ref));
4566 memset(&ref->node, 0, sizeof(ref->node));
4568 ref->parent = parent;
4569 ref->node.full_backref = 1;
4572 ref->node.full_backref = 0;
4574 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4579 static struct data_backref *find_data_backref(struct extent_record *rec,
4580 u64 parent, u64 root,
4581 u64 owner, u64 offset,
4583 u64 disk_bytenr, u64 bytes)
4585 struct rb_node *node;
4586 struct data_backref *back = NULL;
4587 struct data_backref match = {
4594 .found_ref = found_ref,
4595 .disk_bytenr = disk_bytenr,
4599 match.parent = parent;
4600 match.node.full_backref = 1;
4605 node = rb_search(&rec->backref_tree, &match.node.node,
4606 (rb_compare_keys)compare_extent_backref, NULL);
4608 back = to_data_backref(rb_node_to_extent_backref(node));
4613 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4614 u64 parent, u64 root,
4615 u64 owner, u64 offset,
4618 struct data_backref *ref = malloc(sizeof(*ref));
4622 memset(&ref->node, 0, sizeof(ref->node));
4623 ref->node.is_data = 1;
4626 ref->parent = parent;
4629 ref->node.full_backref = 1;
4633 ref->offset = offset;
4634 ref->node.full_backref = 0;
4636 ref->bytes = max_size;
4639 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4640 if (max_size > rec->max_size)
4641 rec->max_size = max_size;
4645 /* Check if the type of extent matches with its chunk */
4646 static void check_extent_type(struct extent_record *rec)
4648 struct btrfs_block_group_cache *bg_cache;
4650 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4654 /* data extent, check chunk directly*/
4655 if (!rec->metadata) {
4656 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4657 rec->wrong_chunk_type = 1;
4661 /* metadata extent, check the obvious case first */
4662 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4663 BTRFS_BLOCK_GROUP_METADATA))) {
4664 rec->wrong_chunk_type = 1;
4669 * Check SYSTEM extent, as it's also marked as metadata, we can only
4670 * make sure it's a SYSTEM extent by its backref
4672 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4673 struct extent_backref *node;
4674 struct tree_backref *tback;
4677 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4678 if (node->is_data) {
4679 /* tree block shouldn't have data backref */
4680 rec->wrong_chunk_type = 1;
4683 tback = container_of(node, struct tree_backref, node);
4685 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4686 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4688 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4689 if (!(bg_cache->flags & bg_type))
4690 rec->wrong_chunk_type = 1;
4695 * Allocate a new extent record, fill default values from @tmpl and insert int
4696 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4697 * the cache, otherwise it fails.
4699 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4700 struct extent_record *tmpl)
4702 struct extent_record *rec;
4705 rec = malloc(sizeof(*rec));
4708 rec->start = tmpl->start;
4709 rec->max_size = tmpl->max_size;
4710 rec->nr = max(tmpl->nr, tmpl->max_size);
4711 rec->found_rec = tmpl->found_rec;
4712 rec->content_checked = tmpl->content_checked;
4713 rec->owner_ref_checked = tmpl->owner_ref_checked;
4714 rec->num_duplicates = 0;
4715 rec->metadata = tmpl->metadata;
4716 rec->flag_block_full_backref = FLAG_UNSET;
4717 rec->bad_full_backref = 0;
4718 rec->crossing_stripes = 0;
4719 rec->wrong_chunk_type = 0;
4720 rec->is_root = tmpl->is_root;
4721 rec->refs = tmpl->refs;
4722 rec->extent_item_refs = tmpl->extent_item_refs;
4723 rec->parent_generation = tmpl->parent_generation;
4724 INIT_LIST_HEAD(&rec->backrefs);
4725 INIT_LIST_HEAD(&rec->dups);
4726 INIT_LIST_HEAD(&rec->list);
4727 rec->backref_tree = RB_ROOT;
4728 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4729 rec->cache.start = tmpl->start;
4730 rec->cache.size = tmpl->nr;
4731 ret = insert_cache_extent(extent_cache, &rec->cache);
4733 bytes_used += rec->nr;
4736 rec->crossing_stripes = check_crossing_stripes(rec->start,
4737 global_info->tree_root->nodesize);
4738 check_extent_type(rec);
4743 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4745 * - refs - if found, increase refs
4746 * - is_root - if found, set
4747 * - content_checked - if found, set
4748 * - owner_ref_checked - if found, set
4750 * If not found, create a new one, initialize and insert.
4752 static int add_extent_rec(struct cache_tree *extent_cache,
4753 struct extent_record *tmpl)
4755 struct extent_record *rec;
4756 struct cache_extent *cache;
4760 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4762 rec = container_of(cache, struct extent_record, cache);
4766 rec->nr = max(tmpl->nr, tmpl->max_size);
4769 * We need to make sure to reset nr to whatever the extent
4770 * record says was the real size, this way we can compare it to
4773 if (tmpl->found_rec) {
4774 if (tmpl->start != rec->start || rec->found_rec) {
4775 struct extent_record *tmp;
4778 if (list_empty(&rec->list))
4779 list_add_tail(&rec->list,
4780 &duplicate_extents);
4783 * We have to do this song and dance in case we
4784 * find an extent record that falls inside of
4785 * our current extent record but does not have
4786 * the same objectid.
4788 tmp = malloc(sizeof(*tmp));
4791 tmp->start = tmpl->start;
4792 tmp->max_size = tmpl->max_size;
4795 tmp->metadata = tmpl->metadata;
4796 tmp->extent_item_refs = tmpl->extent_item_refs;
4797 INIT_LIST_HEAD(&tmp->list);
4798 list_add_tail(&tmp->list, &rec->dups);
4799 rec->num_duplicates++;
4806 if (tmpl->extent_item_refs && !dup) {
4807 if (rec->extent_item_refs) {
4808 fprintf(stderr, "block %llu rec "
4809 "extent_item_refs %llu, passed %llu\n",
4810 (unsigned long long)tmpl->start,
4811 (unsigned long long)
4812 rec->extent_item_refs,
4813 (unsigned long long)tmpl->extent_item_refs);
4815 rec->extent_item_refs = tmpl->extent_item_refs;
4819 if (tmpl->content_checked)
4820 rec->content_checked = 1;
4821 if (tmpl->owner_ref_checked)
4822 rec->owner_ref_checked = 1;
4823 memcpy(&rec->parent_key, &tmpl->parent_key,
4824 sizeof(tmpl->parent_key));
4825 if (tmpl->parent_generation)
4826 rec->parent_generation = tmpl->parent_generation;
4827 if (rec->max_size < tmpl->max_size)
4828 rec->max_size = tmpl->max_size;
4831 * A metadata extent can't cross stripe_len boundary, otherwise
4832 * kernel scrub won't be able to handle it.
4833 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4837 rec->crossing_stripes = check_crossing_stripes(
4838 rec->start, global_info->tree_root->nodesize);
4839 check_extent_type(rec);
4840 maybe_free_extent_rec(extent_cache, rec);
4844 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4849 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4850 u64 parent, u64 root, int found_ref)
4852 struct extent_record *rec;
4853 struct tree_backref *back;
4854 struct cache_extent *cache;
4856 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4858 struct extent_record tmpl;
4860 memset(&tmpl, 0, sizeof(tmpl));
4861 tmpl.start = bytenr;
4865 add_extent_rec_nolookup(extent_cache, &tmpl);
4867 /* really a bug in cache_extent implement now */
4868 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4873 rec = container_of(cache, struct extent_record, cache);
4874 if (rec->start != bytenr) {
4876 * Several cause, from unaligned bytenr to over lapping extents
4881 back = find_tree_backref(rec, parent, root);
4883 back = alloc_tree_backref(rec, parent, root);
4889 if (back->node.found_ref) {
4890 fprintf(stderr, "Extent back ref already exists "
4891 "for %llu parent %llu root %llu \n",
4892 (unsigned long long)bytenr,
4893 (unsigned long long)parent,
4894 (unsigned long long)root);
4896 back->node.found_ref = 1;
4898 if (back->node.found_extent_tree) {
4899 fprintf(stderr, "Extent back ref already exists "
4900 "for %llu parent %llu root %llu \n",
4901 (unsigned long long)bytenr,
4902 (unsigned long long)parent,
4903 (unsigned long long)root);
4905 back->node.found_extent_tree = 1;
4907 check_extent_type(rec);
4908 maybe_free_extent_rec(extent_cache, rec);
4912 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4913 u64 parent, u64 root, u64 owner, u64 offset,
4914 u32 num_refs, int found_ref, u64 max_size)
4916 struct extent_record *rec;
4917 struct data_backref *back;
4918 struct cache_extent *cache;
4920 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4922 struct extent_record tmpl;
4924 memset(&tmpl, 0, sizeof(tmpl));
4925 tmpl.start = bytenr;
4927 tmpl.max_size = max_size;
4929 add_extent_rec_nolookup(extent_cache, &tmpl);
4931 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4936 rec = container_of(cache, struct extent_record, cache);
4937 if (rec->max_size < max_size)
4938 rec->max_size = max_size;
4941 * If found_ref is set then max_size is the real size and must match the
4942 * existing refs. So if we have already found a ref then we need to
4943 * make sure that this ref matches the existing one, otherwise we need
4944 * to add a new backref so we can notice that the backrefs don't match
4945 * and we need to figure out who is telling the truth. This is to
4946 * account for that awful fsync bug I introduced where we'd end up with
4947 * a btrfs_file_extent_item that would have its length include multiple
4948 * prealloc extents or point inside of a prealloc extent.
4950 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4953 back = alloc_data_backref(rec, parent, root, owner, offset,
4959 BUG_ON(num_refs != 1);
4960 if (back->node.found_ref)
4961 BUG_ON(back->bytes != max_size);
4962 back->node.found_ref = 1;
4963 back->found_ref += 1;
4964 back->bytes = max_size;
4965 back->disk_bytenr = bytenr;
4967 rec->content_checked = 1;
4968 rec->owner_ref_checked = 1;
4970 if (back->node.found_extent_tree) {
4971 fprintf(stderr, "Extent back ref already exists "
4972 "for %llu parent %llu root %llu "
4973 "owner %llu offset %llu num_refs %lu\n",
4974 (unsigned long long)bytenr,
4975 (unsigned long long)parent,
4976 (unsigned long long)root,
4977 (unsigned long long)owner,
4978 (unsigned long long)offset,
4979 (unsigned long)num_refs);
4981 back->num_refs = num_refs;
4982 back->node.found_extent_tree = 1;
4984 maybe_free_extent_rec(extent_cache, rec);
4988 static int add_pending(struct cache_tree *pending,
4989 struct cache_tree *seen, u64 bytenr, u32 size)
4992 ret = add_cache_extent(seen, bytenr, size);
4995 add_cache_extent(pending, bytenr, size);
4999 static int pick_next_pending(struct cache_tree *pending,
5000 struct cache_tree *reada,
5001 struct cache_tree *nodes,
5002 u64 last, struct block_info *bits, int bits_nr,
5005 unsigned long node_start = last;
5006 struct cache_extent *cache;
5009 cache = search_cache_extent(reada, 0);
5011 bits[0].start = cache->start;
5012 bits[0].size = cache->size;
5017 if (node_start > 32768)
5018 node_start -= 32768;
5020 cache = search_cache_extent(nodes, node_start);
5022 cache = search_cache_extent(nodes, 0);
5025 cache = search_cache_extent(pending, 0);
5030 bits[ret].start = cache->start;
5031 bits[ret].size = cache->size;
5032 cache = next_cache_extent(cache);
5034 } while (cache && ret < bits_nr);
5040 bits[ret].start = cache->start;
5041 bits[ret].size = cache->size;
5042 cache = next_cache_extent(cache);
5044 } while (cache && ret < bits_nr);
5046 if (bits_nr - ret > 8) {
5047 u64 lookup = bits[0].start + bits[0].size;
5048 struct cache_extent *next;
5049 next = search_cache_extent(pending, lookup);
5051 if (next->start - lookup > 32768)
5053 bits[ret].start = next->start;
5054 bits[ret].size = next->size;
5055 lookup = next->start + next->size;
5059 next = next_cache_extent(next);
5067 static void free_chunk_record(struct cache_extent *cache)
5069 struct chunk_record *rec;
5071 rec = container_of(cache, struct chunk_record, cache);
5072 list_del_init(&rec->list);
5073 list_del_init(&rec->dextents);
5077 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5079 cache_tree_free_extents(chunk_cache, free_chunk_record);
5082 static void free_device_record(struct rb_node *node)
5084 struct device_record *rec;
5086 rec = container_of(node, struct device_record, node);
5090 FREE_RB_BASED_TREE(device_cache, free_device_record);
5092 int insert_block_group_record(struct block_group_tree *tree,
5093 struct block_group_record *bg_rec)
5097 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5101 list_add_tail(&bg_rec->list, &tree->block_groups);
5105 static void free_block_group_record(struct cache_extent *cache)
5107 struct block_group_record *rec;
5109 rec = container_of(cache, struct block_group_record, cache);
5110 list_del_init(&rec->list);
5114 void free_block_group_tree(struct block_group_tree *tree)
5116 cache_tree_free_extents(&tree->tree, free_block_group_record);
5119 int insert_device_extent_record(struct device_extent_tree *tree,
5120 struct device_extent_record *de_rec)
5125 * Device extent is a bit different from the other extents, because
5126 * the extents which belong to the different devices may have the
5127 * same start and size, so we need use the special extent cache
5128 * search/insert functions.
5130 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5134 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5135 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5139 static void free_device_extent_record(struct cache_extent *cache)
5141 struct device_extent_record *rec;
5143 rec = container_of(cache, struct device_extent_record, cache);
5144 if (!list_empty(&rec->chunk_list))
5145 list_del_init(&rec->chunk_list);
5146 if (!list_empty(&rec->device_list))
5147 list_del_init(&rec->device_list);
5151 void free_device_extent_tree(struct device_extent_tree *tree)
5153 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5156 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5157 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5158 struct extent_buffer *leaf, int slot)
5160 struct btrfs_extent_ref_v0 *ref0;
5161 struct btrfs_key key;
5164 btrfs_item_key_to_cpu(leaf, &key, slot);
5165 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5166 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5167 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5170 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5171 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5177 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5178 struct btrfs_key *key,
5181 struct btrfs_chunk *ptr;
5182 struct chunk_record *rec;
5185 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5186 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5188 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5190 fprintf(stderr, "memory allocation failed\n");
5194 INIT_LIST_HEAD(&rec->list);
5195 INIT_LIST_HEAD(&rec->dextents);
5198 rec->cache.start = key->offset;
5199 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5201 rec->generation = btrfs_header_generation(leaf);
5203 rec->objectid = key->objectid;
5204 rec->type = key->type;
5205 rec->offset = key->offset;
5207 rec->length = rec->cache.size;
5208 rec->owner = btrfs_chunk_owner(leaf, ptr);
5209 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5210 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5211 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5212 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5213 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5214 rec->num_stripes = num_stripes;
5215 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5217 for (i = 0; i < rec->num_stripes; ++i) {
5218 rec->stripes[i].devid =
5219 btrfs_stripe_devid_nr(leaf, ptr, i);
5220 rec->stripes[i].offset =
5221 btrfs_stripe_offset_nr(leaf, ptr, i);
5222 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5223 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5230 static int process_chunk_item(struct cache_tree *chunk_cache,
5231 struct btrfs_key *key, struct extent_buffer *eb,
5234 struct chunk_record *rec;
5235 struct btrfs_chunk *chunk;
5238 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5240 * Do extra check for this chunk item,
5242 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5243 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5244 * and owner<->key_type check.
5246 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5249 error("chunk(%llu, %llu) is not valid, ignore it",
5250 key->offset, btrfs_chunk_length(eb, chunk));
5253 rec = btrfs_new_chunk_record(eb, key, slot);
5254 ret = insert_cache_extent(chunk_cache, &rec->cache);
5256 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5257 rec->offset, rec->length);
5264 static int process_device_item(struct rb_root *dev_cache,
5265 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5267 struct btrfs_dev_item *ptr;
5268 struct device_record *rec;
5271 ptr = btrfs_item_ptr(eb,
5272 slot, struct btrfs_dev_item);
5274 rec = malloc(sizeof(*rec));
5276 fprintf(stderr, "memory allocation failed\n");
5280 rec->devid = key->offset;
5281 rec->generation = btrfs_header_generation(eb);
5283 rec->objectid = key->objectid;
5284 rec->type = key->type;
5285 rec->offset = key->offset;
5287 rec->devid = btrfs_device_id(eb, ptr);
5288 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5289 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5291 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5293 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5300 struct block_group_record *
5301 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5304 struct btrfs_block_group_item *ptr;
5305 struct block_group_record *rec;
5307 rec = calloc(1, sizeof(*rec));
5309 fprintf(stderr, "memory allocation failed\n");
5313 rec->cache.start = key->objectid;
5314 rec->cache.size = key->offset;
5316 rec->generation = btrfs_header_generation(leaf);
5318 rec->objectid = key->objectid;
5319 rec->type = key->type;
5320 rec->offset = key->offset;
5322 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5323 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5325 INIT_LIST_HEAD(&rec->list);
5330 static int process_block_group_item(struct block_group_tree *block_group_cache,
5331 struct btrfs_key *key,
5332 struct extent_buffer *eb, int slot)
5334 struct block_group_record *rec;
5337 rec = btrfs_new_block_group_record(eb, key, slot);
5338 ret = insert_block_group_record(block_group_cache, rec);
5340 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5341 rec->objectid, rec->offset);
5348 struct device_extent_record *
5349 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5350 struct btrfs_key *key, int slot)
5352 struct device_extent_record *rec;
5353 struct btrfs_dev_extent *ptr;
5355 rec = calloc(1, sizeof(*rec));
5357 fprintf(stderr, "memory allocation failed\n");
5361 rec->cache.objectid = key->objectid;
5362 rec->cache.start = key->offset;
5364 rec->generation = btrfs_header_generation(leaf);
5366 rec->objectid = key->objectid;
5367 rec->type = key->type;
5368 rec->offset = key->offset;
5370 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5371 rec->chunk_objecteid =
5372 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5374 btrfs_dev_extent_chunk_offset(leaf, ptr);
5375 rec->length = btrfs_dev_extent_length(leaf, ptr);
5376 rec->cache.size = rec->length;
5378 INIT_LIST_HEAD(&rec->chunk_list);
5379 INIT_LIST_HEAD(&rec->device_list);
5385 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5386 struct btrfs_key *key, struct extent_buffer *eb,
5389 struct device_extent_record *rec;
5392 rec = btrfs_new_device_extent_record(eb, key, slot);
5393 ret = insert_device_extent_record(dev_extent_cache, rec);
5396 "Device extent[%llu, %llu, %llu] existed.\n",
5397 rec->objectid, rec->offset, rec->length);
5404 static int process_extent_item(struct btrfs_root *root,
5405 struct cache_tree *extent_cache,
5406 struct extent_buffer *eb, int slot)
5408 struct btrfs_extent_item *ei;
5409 struct btrfs_extent_inline_ref *iref;
5410 struct btrfs_extent_data_ref *dref;
5411 struct btrfs_shared_data_ref *sref;
5412 struct btrfs_key key;
5413 struct extent_record tmpl;
5418 u32 item_size = btrfs_item_size_nr(eb, slot);
5424 btrfs_item_key_to_cpu(eb, &key, slot);
5426 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5428 num_bytes = root->nodesize;
5430 num_bytes = key.offset;
5433 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5434 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5435 key.objectid, root->sectorsize);
5438 if (item_size < sizeof(*ei)) {
5439 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5440 struct btrfs_extent_item_v0 *ei0;
5441 BUG_ON(item_size != sizeof(*ei0));
5442 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5443 refs = btrfs_extent_refs_v0(eb, ei0);
5447 memset(&tmpl, 0, sizeof(tmpl));
5448 tmpl.start = key.objectid;
5449 tmpl.nr = num_bytes;
5450 tmpl.extent_item_refs = refs;
5451 tmpl.metadata = metadata;
5453 tmpl.max_size = num_bytes;
5455 return add_extent_rec(extent_cache, &tmpl);
5458 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5459 refs = btrfs_extent_refs(eb, ei);
5460 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5464 if (metadata && num_bytes != root->nodesize) {
5465 error("ignore invalid metadata extent, length %llu does not equal to %u",
5466 num_bytes, root->nodesize);
5469 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5470 error("ignore invalid data extent, length %llu is not aligned to %u",
5471 num_bytes, root->sectorsize);
5475 memset(&tmpl, 0, sizeof(tmpl));
5476 tmpl.start = key.objectid;
5477 tmpl.nr = num_bytes;
5478 tmpl.extent_item_refs = refs;
5479 tmpl.metadata = metadata;
5481 tmpl.max_size = num_bytes;
5482 add_extent_rec(extent_cache, &tmpl);
5484 ptr = (unsigned long)(ei + 1);
5485 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5486 key.type == BTRFS_EXTENT_ITEM_KEY)
5487 ptr += sizeof(struct btrfs_tree_block_info);
5489 end = (unsigned long)ei + item_size;
5491 iref = (struct btrfs_extent_inline_ref *)ptr;
5492 type = btrfs_extent_inline_ref_type(eb, iref);
5493 offset = btrfs_extent_inline_ref_offset(eb, iref);
5495 case BTRFS_TREE_BLOCK_REF_KEY:
5496 ret = add_tree_backref(extent_cache, key.objectid,
5499 error("add_tree_backref failed: %s",
5502 case BTRFS_SHARED_BLOCK_REF_KEY:
5503 ret = add_tree_backref(extent_cache, key.objectid,
5506 error("add_tree_backref failed: %s",
5509 case BTRFS_EXTENT_DATA_REF_KEY:
5510 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5511 add_data_backref(extent_cache, key.objectid, 0,
5512 btrfs_extent_data_ref_root(eb, dref),
5513 btrfs_extent_data_ref_objectid(eb,
5515 btrfs_extent_data_ref_offset(eb, dref),
5516 btrfs_extent_data_ref_count(eb, dref),
5519 case BTRFS_SHARED_DATA_REF_KEY:
5520 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5521 add_data_backref(extent_cache, key.objectid, offset,
5523 btrfs_shared_data_ref_count(eb, sref),
5527 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5528 key.objectid, key.type, num_bytes);
5531 ptr += btrfs_extent_inline_ref_size(type);
5538 static int check_cache_range(struct btrfs_root *root,
5539 struct btrfs_block_group_cache *cache,
5540 u64 offset, u64 bytes)
5542 struct btrfs_free_space *entry;
5548 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5549 bytenr = btrfs_sb_offset(i);
5550 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5551 cache->key.objectid, bytenr, 0,
5552 &logical, &nr, &stripe_len);
5557 if (logical[nr] + stripe_len <= offset)
5559 if (offset + bytes <= logical[nr])
5561 if (logical[nr] == offset) {
5562 if (stripe_len >= bytes) {
5566 bytes -= stripe_len;
5567 offset += stripe_len;
5568 } else if (logical[nr] < offset) {
5569 if (logical[nr] + stripe_len >=
5574 bytes = (offset + bytes) -
5575 (logical[nr] + stripe_len);
5576 offset = logical[nr] + stripe_len;
5579 * Could be tricky, the super may land in the
5580 * middle of the area we're checking. First
5581 * check the easiest case, it's at the end.
5583 if (logical[nr] + stripe_len >=
5585 bytes = logical[nr] - offset;
5589 /* Check the left side */
5590 ret = check_cache_range(root, cache,
5592 logical[nr] - offset);
5598 /* Now we continue with the right side */
5599 bytes = (offset + bytes) -
5600 (logical[nr] + stripe_len);
5601 offset = logical[nr] + stripe_len;
5608 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5610 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5611 offset, offset+bytes);
5615 if (entry->offset != offset) {
5616 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5621 if (entry->bytes != bytes) {
5622 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5623 bytes, entry->bytes, offset);
5627 unlink_free_space(cache->free_space_ctl, entry);
5632 static int verify_space_cache(struct btrfs_root *root,
5633 struct btrfs_block_group_cache *cache)
5635 struct btrfs_path *path;
5636 struct extent_buffer *leaf;
5637 struct btrfs_key key;
5641 path = btrfs_alloc_path();
5645 root = root->fs_info->extent_root;
5647 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5649 key.objectid = last;
5651 key.type = BTRFS_EXTENT_ITEM_KEY;
5653 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5658 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5659 ret = btrfs_next_leaf(root, path);
5667 leaf = path->nodes[0];
5668 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5669 if (key.objectid >= cache->key.offset + cache->key.objectid)
5671 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5672 key.type != BTRFS_METADATA_ITEM_KEY) {
5677 if (last == key.objectid) {
5678 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5679 last = key.objectid + key.offset;
5681 last = key.objectid + root->nodesize;
5686 ret = check_cache_range(root, cache, last,
5687 key.objectid - last);
5690 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5691 last = key.objectid + key.offset;
5693 last = key.objectid + root->nodesize;
5697 if (last < cache->key.objectid + cache->key.offset)
5698 ret = check_cache_range(root, cache, last,
5699 cache->key.objectid +
5700 cache->key.offset - last);
5703 btrfs_free_path(path);
5706 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5707 fprintf(stderr, "There are still entries left in the space "
5715 static int check_space_cache(struct btrfs_root *root)
5717 struct btrfs_block_group_cache *cache;
5718 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5722 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5723 btrfs_super_generation(root->fs_info->super_copy) !=
5724 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5725 printf("cache and super generation don't match, space cache "
5726 "will be invalidated\n");
5730 if (ctx.progress_enabled) {
5731 ctx.tp = TASK_FREE_SPACE;
5732 task_start(ctx.info);
5736 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5740 start = cache->key.objectid + cache->key.offset;
5741 if (!cache->free_space_ctl) {
5742 if (btrfs_init_free_space_ctl(cache,
5743 root->sectorsize)) {
5748 btrfs_remove_free_space_cache(cache);
5751 if (btrfs_fs_compat_ro(root->fs_info,
5752 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5753 ret = exclude_super_stripes(root, cache);
5755 fprintf(stderr, "could not exclude super stripes: %s\n",
5760 ret = load_free_space_tree(root->fs_info, cache);
5761 free_excluded_extents(root, cache);
5763 fprintf(stderr, "could not load free space tree: %s\n",
5770 ret = load_free_space_cache(root->fs_info, cache);
5775 ret = verify_space_cache(root, cache);
5777 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5778 cache->key.objectid);
5783 task_stop(ctx.info);
5785 return error ? -EINVAL : 0;
5788 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5789 u64 num_bytes, unsigned long leaf_offset,
5790 struct extent_buffer *eb) {
5793 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5795 unsigned long csum_offset;
5799 u64 data_checked = 0;
5805 if (num_bytes % root->sectorsize)
5808 data = malloc(num_bytes);
5812 while (offset < num_bytes) {
5815 read_len = num_bytes - offset;
5816 /* read as much space once a time */
5817 ret = read_extent_data(root, data + offset,
5818 bytenr + offset, &read_len, mirror);
5822 /* verify every 4k data's checksum */
5823 while (data_checked < read_len) {
5825 tmp = offset + data_checked;
5827 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5828 csum, root->sectorsize);
5829 btrfs_csum_final(csum, (char *)&csum);
5831 csum_offset = leaf_offset +
5832 tmp / root->sectorsize * csum_size;
5833 read_extent_buffer(eb, (char *)&csum_expected,
5834 csum_offset, csum_size);
5835 /* try another mirror */
5836 if (csum != csum_expected) {
5837 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5838 mirror, bytenr + tmp,
5839 csum, csum_expected);
5840 num_copies = btrfs_num_copies(
5841 &root->fs_info->mapping_tree,
5843 if (mirror < num_copies - 1) {
5848 data_checked += root->sectorsize;
5857 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5860 struct btrfs_path *path;
5861 struct extent_buffer *leaf;
5862 struct btrfs_key key;
5865 path = btrfs_alloc_path();
5867 fprintf(stderr, "Error allocating path\n");
5871 key.objectid = bytenr;
5872 key.type = BTRFS_EXTENT_ITEM_KEY;
5873 key.offset = (u64)-1;
5876 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5879 fprintf(stderr, "Error looking up extent record %d\n", ret);
5880 btrfs_free_path(path);
5883 if (path->slots[0] > 0) {
5886 ret = btrfs_prev_leaf(root, path);
5889 } else if (ret > 0) {
5896 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5899 * Block group items come before extent items if they have the same
5900 * bytenr, so walk back one more just in case. Dear future traveller,
5901 * first congrats on mastering time travel. Now if it's not too much
5902 * trouble could you go back to 2006 and tell Chris to make the
5903 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5904 * EXTENT_ITEM_KEY please?
5906 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5907 if (path->slots[0] > 0) {
5910 ret = btrfs_prev_leaf(root, path);
5913 } else if (ret > 0) {
5918 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5922 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5923 ret = btrfs_next_leaf(root, path);
5925 fprintf(stderr, "Error going to next leaf "
5927 btrfs_free_path(path);
5933 leaf = path->nodes[0];
5934 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5935 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5939 if (key.objectid + key.offset < bytenr) {
5943 if (key.objectid > bytenr + num_bytes)
5946 if (key.objectid == bytenr) {
5947 if (key.offset >= num_bytes) {
5951 num_bytes -= key.offset;
5952 bytenr += key.offset;
5953 } else if (key.objectid < bytenr) {
5954 if (key.objectid + key.offset >= bytenr + num_bytes) {
5958 num_bytes = (bytenr + num_bytes) -
5959 (key.objectid + key.offset);
5960 bytenr = key.objectid + key.offset;
5962 if (key.objectid + key.offset < bytenr + num_bytes) {
5963 u64 new_start = key.objectid + key.offset;
5964 u64 new_bytes = bytenr + num_bytes - new_start;
5967 * Weird case, the extent is in the middle of
5968 * our range, we'll have to search one side
5969 * and then the other. Not sure if this happens
5970 * in real life, but no harm in coding it up
5971 * anyway just in case.
5973 btrfs_release_path(path);
5974 ret = check_extent_exists(root, new_start,
5977 fprintf(stderr, "Right section didn't "
5981 num_bytes = key.objectid - bytenr;
5984 num_bytes = key.objectid - bytenr;
5991 if (num_bytes && !ret) {
5992 fprintf(stderr, "There are no extents for csum range "
5993 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5997 btrfs_free_path(path);
6001 static int check_csums(struct btrfs_root *root)
6003 struct btrfs_path *path;
6004 struct extent_buffer *leaf;
6005 struct btrfs_key key;
6006 u64 offset = 0, num_bytes = 0;
6007 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6011 unsigned long leaf_offset;
6013 root = root->fs_info->csum_root;
6014 if (!extent_buffer_uptodate(root->node)) {
6015 fprintf(stderr, "No valid csum tree found\n");
6019 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6020 key.type = BTRFS_EXTENT_CSUM_KEY;
6023 path = btrfs_alloc_path();
6027 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6029 fprintf(stderr, "Error searching csum tree %d\n", ret);
6030 btrfs_free_path(path);
6034 if (ret > 0 && path->slots[0])
6039 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6040 ret = btrfs_next_leaf(root, path);
6042 fprintf(stderr, "Error going to next leaf "
6049 leaf = path->nodes[0];
6051 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6052 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6057 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6058 csum_size) * root->sectorsize;
6059 if (!check_data_csum)
6060 goto skip_csum_check;
6061 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6062 ret = check_extent_csums(root, key.offset, data_len,
6068 offset = key.offset;
6069 } else if (key.offset != offset + num_bytes) {
6070 ret = check_extent_exists(root, offset, num_bytes);
6072 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6073 "there is no extent record\n",
6074 offset, offset+num_bytes);
6077 offset = key.offset;
6080 num_bytes += data_len;
6084 btrfs_free_path(path);
6088 static int is_dropped_key(struct btrfs_key *key,
6089 struct btrfs_key *drop_key) {
6090 if (key->objectid < drop_key->objectid)
6092 else if (key->objectid == drop_key->objectid) {
6093 if (key->type < drop_key->type)
6095 else if (key->type == drop_key->type) {
6096 if (key->offset < drop_key->offset)
6104 * Here are the rules for FULL_BACKREF.
6106 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6107 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6109 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6110 * if it happened after the relocation occurred since we'll have dropped the
6111 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6112 * have no real way to know for sure.
6114 * We process the blocks one root at a time, and we start from the lowest root
6115 * objectid and go to the highest. So we can just lookup the owner backref for
6116 * the record and if we don't find it then we know it doesn't exist and we have
6119 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6120 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6121 * be set or not and then we can check later once we've gathered all the refs.
6123 static int calc_extent_flag(struct btrfs_root *root,
6124 struct cache_tree *extent_cache,
6125 struct extent_buffer *buf,
6126 struct root_item_record *ri,
6129 struct extent_record *rec;
6130 struct cache_extent *cache;
6131 struct tree_backref *tback;
6134 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6135 /* we have added this extent before */
6137 rec = container_of(cache, struct extent_record, cache);
6140 * Except file/reloc tree, we can not have
6143 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6148 if (buf->start == ri->bytenr)
6151 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6154 owner = btrfs_header_owner(buf);
6155 if (owner == ri->objectid)
6158 tback = find_tree_backref(rec, 0, owner);
6163 if (rec->flag_block_full_backref != FLAG_UNSET &&
6164 rec->flag_block_full_backref != 0)
6165 rec->bad_full_backref = 1;
6168 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6169 if (rec->flag_block_full_backref != FLAG_UNSET &&
6170 rec->flag_block_full_backref != 1)
6171 rec->bad_full_backref = 1;
6175 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6177 fprintf(stderr, "Invalid key type(");
6178 print_key_type(stderr, 0, key_type);
6179 fprintf(stderr, ") found in root(");
6180 print_objectid(stderr, rootid, 0);
6181 fprintf(stderr, ")\n");
6185 * Check if the key is valid with its extent buffer.
6187 * This is a early check in case invalid key exists in a extent buffer
6188 * This is not comprehensive yet, but should prevent wrong key/item passed
6191 static int check_type_with_root(u64 rootid, u8 key_type)
6194 /* Only valid in chunk tree */
6195 case BTRFS_DEV_ITEM_KEY:
6196 case BTRFS_CHUNK_ITEM_KEY:
6197 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6200 /* valid in csum and log tree */
6201 case BTRFS_CSUM_TREE_OBJECTID:
6202 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6206 case BTRFS_EXTENT_ITEM_KEY:
6207 case BTRFS_METADATA_ITEM_KEY:
6208 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6209 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6212 case BTRFS_ROOT_ITEM_KEY:
6213 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6216 case BTRFS_DEV_EXTENT_KEY:
6217 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6223 report_mismatch_key_root(key_type, rootid);
6227 static int run_next_block(struct btrfs_root *root,
6228 struct block_info *bits,
6231 struct cache_tree *pending,
6232 struct cache_tree *seen,
6233 struct cache_tree *reada,
6234 struct cache_tree *nodes,
6235 struct cache_tree *extent_cache,
6236 struct cache_tree *chunk_cache,
6237 struct rb_root *dev_cache,
6238 struct block_group_tree *block_group_cache,
6239 struct device_extent_tree *dev_extent_cache,
6240 struct root_item_record *ri)
6242 struct extent_buffer *buf;
6243 struct extent_record *rec = NULL;
6254 struct btrfs_key key;
6255 struct cache_extent *cache;
6258 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6259 bits_nr, &reada_bits);
6264 for(i = 0; i < nritems; i++) {
6265 ret = add_cache_extent(reada, bits[i].start,
6270 /* fixme, get the parent transid */
6271 readahead_tree_block(root, bits[i].start,
6275 *last = bits[0].start;
6276 bytenr = bits[0].start;
6277 size = bits[0].size;
6279 cache = lookup_cache_extent(pending, bytenr, size);
6281 remove_cache_extent(pending, cache);
6284 cache = lookup_cache_extent(reada, bytenr, size);
6286 remove_cache_extent(reada, cache);
6289 cache = lookup_cache_extent(nodes, bytenr, size);
6291 remove_cache_extent(nodes, cache);
6294 cache = lookup_cache_extent(extent_cache, bytenr, size);
6296 rec = container_of(cache, struct extent_record, cache);
6297 gen = rec->parent_generation;
6300 /* fixme, get the real parent transid */
6301 buf = read_tree_block(root, bytenr, size, gen);
6302 if (!extent_buffer_uptodate(buf)) {
6303 record_bad_block_io(root->fs_info,
6304 extent_cache, bytenr, size);
6308 nritems = btrfs_header_nritems(buf);
6311 if (!init_extent_tree) {
6312 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6313 btrfs_header_level(buf), 1, NULL,
6316 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6318 fprintf(stderr, "Couldn't calc extent flags\n");
6319 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6324 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6326 fprintf(stderr, "Couldn't calc extent flags\n");
6327 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6331 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6333 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6334 ri->objectid == btrfs_header_owner(buf)) {
6336 * Ok we got to this block from it's original owner and
6337 * we have FULL_BACKREF set. Relocation can leave
6338 * converted blocks over so this is altogether possible,
6339 * however it's not possible if the generation > the
6340 * last snapshot, so check for this case.
6342 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6343 btrfs_header_generation(buf) > ri->last_snapshot) {
6344 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6345 rec->bad_full_backref = 1;
6350 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6351 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6352 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6353 rec->bad_full_backref = 1;
6357 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6358 rec->flag_block_full_backref = 1;
6362 rec->flag_block_full_backref = 0;
6364 owner = btrfs_header_owner(buf);
6367 ret = check_block(root, extent_cache, buf, flags);
6371 if (btrfs_is_leaf(buf)) {
6372 btree_space_waste += btrfs_leaf_free_space(root, buf);
6373 for (i = 0; i < nritems; i++) {
6374 struct btrfs_file_extent_item *fi;
6375 btrfs_item_key_to_cpu(buf, &key, i);
6377 * Check key type against the leaf owner.
6378 * Could filter quite a lot of early error if
6381 if (check_type_with_root(btrfs_header_owner(buf),
6383 fprintf(stderr, "ignoring invalid key\n");
6386 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6387 process_extent_item(root, extent_cache, buf,
6391 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6392 process_extent_item(root, extent_cache, buf,
6396 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6398 btrfs_item_size_nr(buf, i);
6401 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6402 process_chunk_item(chunk_cache, &key, buf, i);
6405 if (key.type == BTRFS_DEV_ITEM_KEY) {
6406 process_device_item(dev_cache, &key, buf, i);
6409 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6410 process_block_group_item(block_group_cache,
6414 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6415 process_device_extent_item(dev_extent_cache,
6420 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6421 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6422 process_extent_ref_v0(extent_cache, buf, i);
6429 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6430 ret = add_tree_backref(extent_cache,
6431 key.objectid, 0, key.offset, 0);
6433 error("add_tree_backref failed: %s",
6437 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6438 ret = add_tree_backref(extent_cache,
6439 key.objectid, key.offset, 0, 0);
6441 error("add_tree_backref failed: %s",
6445 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6446 struct btrfs_extent_data_ref *ref;
6447 ref = btrfs_item_ptr(buf, i,
6448 struct btrfs_extent_data_ref);
6449 add_data_backref(extent_cache,
6451 btrfs_extent_data_ref_root(buf, ref),
6452 btrfs_extent_data_ref_objectid(buf,
6454 btrfs_extent_data_ref_offset(buf, ref),
6455 btrfs_extent_data_ref_count(buf, ref),
6456 0, root->sectorsize);
6459 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6460 struct btrfs_shared_data_ref *ref;
6461 ref = btrfs_item_ptr(buf, i,
6462 struct btrfs_shared_data_ref);
6463 add_data_backref(extent_cache,
6464 key.objectid, key.offset, 0, 0, 0,
6465 btrfs_shared_data_ref_count(buf, ref),
6466 0, root->sectorsize);
6469 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6470 struct bad_item *bad;
6472 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6476 bad = malloc(sizeof(struct bad_item));
6479 INIT_LIST_HEAD(&bad->list);
6480 memcpy(&bad->key, &key,
6481 sizeof(struct btrfs_key));
6482 bad->root_id = owner;
6483 list_add_tail(&bad->list, &delete_items);
6486 if (key.type != BTRFS_EXTENT_DATA_KEY)
6488 fi = btrfs_item_ptr(buf, i,
6489 struct btrfs_file_extent_item);
6490 if (btrfs_file_extent_type(buf, fi) ==
6491 BTRFS_FILE_EXTENT_INLINE)
6493 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6496 data_bytes_allocated +=
6497 btrfs_file_extent_disk_num_bytes(buf, fi);
6498 if (data_bytes_allocated < root->sectorsize) {
6501 data_bytes_referenced +=
6502 btrfs_file_extent_num_bytes(buf, fi);
6503 add_data_backref(extent_cache,
6504 btrfs_file_extent_disk_bytenr(buf, fi),
6505 parent, owner, key.objectid, key.offset -
6506 btrfs_file_extent_offset(buf, fi), 1, 1,
6507 btrfs_file_extent_disk_num_bytes(buf, fi));
6511 struct btrfs_key first_key;
6513 first_key.objectid = 0;
6516 btrfs_item_key_to_cpu(buf, &first_key, 0);
6517 level = btrfs_header_level(buf);
6518 for (i = 0; i < nritems; i++) {
6519 struct extent_record tmpl;
6521 ptr = btrfs_node_blockptr(buf, i);
6522 size = root->nodesize;
6523 btrfs_node_key_to_cpu(buf, &key, i);
6525 if ((level == ri->drop_level)
6526 && is_dropped_key(&key, &ri->drop_key)) {
6531 memset(&tmpl, 0, sizeof(tmpl));
6532 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6533 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6538 tmpl.max_size = size;
6539 ret = add_extent_rec(extent_cache, &tmpl);
6543 ret = add_tree_backref(extent_cache, ptr, parent,
6546 error("add_tree_backref failed: %s",
6552 add_pending(nodes, seen, ptr, size);
6554 add_pending(pending, seen, ptr, size);
6557 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6558 nritems) * sizeof(struct btrfs_key_ptr);
6560 total_btree_bytes += buf->len;
6561 if (fs_root_objectid(btrfs_header_owner(buf)))
6562 total_fs_tree_bytes += buf->len;
6563 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6564 total_extent_tree_bytes += buf->len;
6565 if (!found_old_backref &&
6566 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6567 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6568 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6569 found_old_backref = 1;
6571 free_extent_buffer(buf);
6575 static int add_root_to_pending(struct extent_buffer *buf,
6576 struct cache_tree *extent_cache,
6577 struct cache_tree *pending,
6578 struct cache_tree *seen,
6579 struct cache_tree *nodes,
6582 struct extent_record tmpl;
6585 if (btrfs_header_level(buf) > 0)
6586 add_pending(nodes, seen, buf->start, buf->len);
6588 add_pending(pending, seen, buf->start, buf->len);
6590 memset(&tmpl, 0, sizeof(tmpl));
6591 tmpl.start = buf->start;
6596 tmpl.max_size = buf->len;
6597 add_extent_rec(extent_cache, &tmpl);
6599 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6600 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6601 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6604 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6609 /* as we fix the tree, we might be deleting blocks that
6610 * we're tracking for repair. This hook makes sure we
6611 * remove any backrefs for blocks as we are fixing them.
6613 static int free_extent_hook(struct btrfs_trans_handle *trans,
6614 struct btrfs_root *root,
6615 u64 bytenr, u64 num_bytes, u64 parent,
6616 u64 root_objectid, u64 owner, u64 offset,
6619 struct extent_record *rec;
6620 struct cache_extent *cache;
6622 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6624 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6625 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6629 rec = container_of(cache, struct extent_record, cache);
6631 struct data_backref *back;
6632 back = find_data_backref(rec, parent, root_objectid, owner,
6633 offset, 1, bytenr, num_bytes);
6636 if (back->node.found_ref) {
6637 back->found_ref -= refs_to_drop;
6639 rec->refs -= refs_to_drop;
6641 if (back->node.found_extent_tree) {
6642 back->num_refs -= refs_to_drop;
6643 if (rec->extent_item_refs)
6644 rec->extent_item_refs -= refs_to_drop;
6646 if (back->found_ref == 0)
6647 back->node.found_ref = 0;
6648 if (back->num_refs == 0)
6649 back->node.found_extent_tree = 0;
6651 if (!back->node.found_extent_tree && back->node.found_ref) {
6652 rb_erase(&back->node.node, &rec->backref_tree);
6656 struct tree_backref *back;
6657 back = find_tree_backref(rec, parent, root_objectid);
6660 if (back->node.found_ref) {
6663 back->node.found_ref = 0;
6665 if (back->node.found_extent_tree) {
6666 if (rec->extent_item_refs)
6667 rec->extent_item_refs--;
6668 back->node.found_extent_tree = 0;
6670 if (!back->node.found_extent_tree && back->node.found_ref) {
6671 rb_erase(&back->node.node, &rec->backref_tree);
6675 maybe_free_extent_rec(extent_cache, rec);
6680 static int delete_extent_records(struct btrfs_trans_handle *trans,
6681 struct btrfs_root *root,
6682 struct btrfs_path *path,
6683 u64 bytenr, u64 new_len)
6685 struct btrfs_key key;
6686 struct btrfs_key found_key;
6687 struct extent_buffer *leaf;
6692 key.objectid = bytenr;
6694 key.offset = (u64)-1;
6697 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6704 if (path->slots[0] == 0)
6710 leaf = path->nodes[0];
6711 slot = path->slots[0];
6713 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6714 if (found_key.objectid != bytenr)
6717 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6718 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6719 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6720 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6721 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6722 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6723 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6724 btrfs_release_path(path);
6725 if (found_key.type == 0) {
6726 if (found_key.offset == 0)
6728 key.offset = found_key.offset - 1;
6729 key.type = found_key.type;
6731 key.type = found_key.type - 1;
6732 key.offset = (u64)-1;
6736 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6737 found_key.objectid, found_key.type, found_key.offset);
6739 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6742 btrfs_release_path(path);
6744 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6745 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6746 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6747 found_key.offset : root->nodesize;
6749 ret = btrfs_update_block_group(trans, root, bytenr,
6756 btrfs_release_path(path);
6761 * for a single backref, this will allocate a new extent
6762 * and add the backref to it.
6764 static int record_extent(struct btrfs_trans_handle *trans,
6765 struct btrfs_fs_info *info,
6766 struct btrfs_path *path,
6767 struct extent_record *rec,
6768 struct extent_backref *back,
6769 int allocated, u64 flags)
6772 struct btrfs_root *extent_root = info->extent_root;
6773 struct extent_buffer *leaf;
6774 struct btrfs_key ins_key;
6775 struct btrfs_extent_item *ei;
6776 struct tree_backref *tback;
6777 struct data_backref *dback;
6778 struct btrfs_tree_block_info *bi;
6781 rec->max_size = max_t(u64, rec->max_size,
6782 info->extent_root->nodesize);
6785 u32 item_size = sizeof(*ei);
6788 item_size += sizeof(*bi);
6790 ins_key.objectid = rec->start;
6791 ins_key.offset = rec->max_size;
6792 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6794 ret = btrfs_insert_empty_item(trans, extent_root, path,
6795 &ins_key, item_size);
6799 leaf = path->nodes[0];
6800 ei = btrfs_item_ptr(leaf, path->slots[0],
6801 struct btrfs_extent_item);
6803 btrfs_set_extent_refs(leaf, ei, 0);
6804 btrfs_set_extent_generation(leaf, ei, rec->generation);
6806 if (back->is_data) {
6807 btrfs_set_extent_flags(leaf, ei,
6808 BTRFS_EXTENT_FLAG_DATA);
6810 struct btrfs_disk_key copy_key;;
6812 tback = to_tree_backref(back);
6813 bi = (struct btrfs_tree_block_info *)(ei + 1);
6814 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6817 btrfs_set_disk_key_objectid(©_key,
6818 rec->info_objectid);
6819 btrfs_set_disk_key_type(©_key, 0);
6820 btrfs_set_disk_key_offset(©_key, 0);
6822 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6823 btrfs_set_tree_block_key(leaf, bi, ©_key);
6825 btrfs_set_extent_flags(leaf, ei,
6826 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6829 btrfs_mark_buffer_dirty(leaf);
6830 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6831 rec->max_size, 1, 0);
6834 btrfs_release_path(path);
6837 if (back->is_data) {
6841 dback = to_data_backref(back);
6842 if (back->full_backref)
6843 parent = dback->parent;
6847 for (i = 0; i < dback->found_ref; i++) {
6848 /* if parent != 0, we're doing a full backref
6849 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6850 * just makes the backref allocator create a data
6853 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6854 rec->start, rec->max_size,
6858 BTRFS_FIRST_FREE_OBJECTID :
6864 fprintf(stderr, "adding new data backref"
6865 " on %llu %s %llu owner %llu"
6866 " offset %llu found %d\n",
6867 (unsigned long long)rec->start,
6868 back->full_backref ?
6870 back->full_backref ?
6871 (unsigned long long)parent :
6872 (unsigned long long)dback->root,
6873 (unsigned long long)dback->owner,
6874 (unsigned long long)dback->offset,
6879 tback = to_tree_backref(back);
6880 if (back->full_backref)
6881 parent = tback->parent;
6885 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6886 rec->start, rec->max_size,
6887 parent, tback->root, 0, 0);
6888 fprintf(stderr, "adding new tree backref on "
6889 "start %llu len %llu parent %llu root %llu\n",
6890 rec->start, rec->max_size, parent, tback->root);
6893 btrfs_release_path(path);
6897 static struct extent_entry *find_entry(struct list_head *entries,
6898 u64 bytenr, u64 bytes)
6900 struct extent_entry *entry = NULL;
6902 list_for_each_entry(entry, entries, list) {
6903 if (entry->bytenr == bytenr && entry->bytes == bytes)
6910 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6912 struct extent_entry *entry, *best = NULL, *prev = NULL;
6914 list_for_each_entry(entry, entries, list) {
6921 * If there are as many broken entries as entries then we know
6922 * not to trust this particular entry.
6924 if (entry->broken == entry->count)
6928 * If our current entry == best then we can't be sure our best
6929 * is really the best, so we need to keep searching.
6931 if (best && best->count == entry->count) {
6937 /* Prev == entry, not good enough, have to keep searching */
6938 if (!prev->broken && prev->count == entry->count)
6942 best = (prev->count > entry->count) ? prev : entry;
6943 else if (best->count < entry->count)
6951 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6952 struct data_backref *dback, struct extent_entry *entry)
6954 struct btrfs_trans_handle *trans;
6955 struct btrfs_root *root;
6956 struct btrfs_file_extent_item *fi;
6957 struct extent_buffer *leaf;
6958 struct btrfs_key key;
6962 key.objectid = dback->root;
6963 key.type = BTRFS_ROOT_ITEM_KEY;
6964 key.offset = (u64)-1;
6965 root = btrfs_read_fs_root(info, &key);
6967 fprintf(stderr, "Couldn't find root for our ref\n");
6972 * The backref points to the original offset of the extent if it was
6973 * split, so we need to search down to the offset we have and then walk
6974 * forward until we find the backref we're looking for.
6976 key.objectid = dback->owner;
6977 key.type = BTRFS_EXTENT_DATA_KEY;
6978 key.offset = dback->offset;
6979 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6981 fprintf(stderr, "Error looking up ref %d\n", ret);
6986 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6987 ret = btrfs_next_leaf(root, path);
6989 fprintf(stderr, "Couldn't find our ref, next\n");
6993 leaf = path->nodes[0];
6994 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6995 if (key.objectid != dback->owner ||
6996 key.type != BTRFS_EXTENT_DATA_KEY) {
6997 fprintf(stderr, "Couldn't find our ref, search\n");
7000 fi = btrfs_item_ptr(leaf, path->slots[0],
7001 struct btrfs_file_extent_item);
7002 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7003 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7005 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7010 btrfs_release_path(path);
7012 trans = btrfs_start_transaction(root, 1);
7014 return PTR_ERR(trans);
7017 * Ok we have the key of the file extent we want to fix, now we can cow
7018 * down to the thing and fix it.
7020 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7022 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7023 key.objectid, key.type, key.offset, ret);
7027 fprintf(stderr, "Well that's odd, we just found this key "
7028 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7033 leaf = path->nodes[0];
7034 fi = btrfs_item_ptr(leaf, path->slots[0],
7035 struct btrfs_file_extent_item);
7037 if (btrfs_file_extent_compression(leaf, fi) &&
7038 dback->disk_bytenr != entry->bytenr) {
7039 fprintf(stderr, "Ref doesn't match the record start and is "
7040 "compressed, please take a btrfs-image of this file "
7041 "system and send it to a btrfs developer so they can "
7042 "complete this functionality for bytenr %Lu\n",
7043 dback->disk_bytenr);
7048 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7049 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7050 } else if (dback->disk_bytenr > entry->bytenr) {
7051 u64 off_diff, offset;
7053 off_diff = dback->disk_bytenr - entry->bytenr;
7054 offset = btrfs_file_extent_offset(leaf, fi);
7055 if (dback->disk_bytenr + offset +
7056 btrfs_file_extent_num_bytes(leaf, fi) >
7057 entry->bytenr + entry->bytes) {
7058 fprintf(stderr, "Ref is past the entry end, please "
7059 "take a btrfs-image of this file system and "
7060 "send it to a btrfs developer, ref %Lu\n",
7061 dback->disk_bytenr);
7066 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7067 btrfs_set_file_extent_offset(leaf, fi, offset);
7068 } else if (dback->disk_bytenr < entry->bytenr) {
7071 offset = btrfs_file_extent_offset(leaf, fi);
7072 if (dback->disk_bytenr + offset < entry->bytenr) {
7073 fprintf(stderr, "Ref is before the entry start, please"
7074 " take a btrfs-image of this file system and "
7075 "send it to a btrfs developer, ref %Lu\n",
7076 dback->disk_bytenr);
7081 offset += dback->disk_bytenr;
7082 offset -= entry->bytenr;
7083 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7084 btrfs_set_file_extent_offset(leaf, fi, offset);
7087 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7090 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7091 * only do this if we aren't using compression, otherwise it's a
7094 if (!btrfs_file_extent_compression(leaf, fi))
7095 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7097 printf("ram bytes may be wrong?\n");
7098 btrfs_mark_buffer_dirty(leaf);
7100 err = btrfs_commit_transaction(trans, root);
7101 btrfs_release_path(path);
7102 return ret ? ret : err;
7105 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7106 struct extent_record *rec)
7108 struct extent_backref *back, *tmp;
7109 struct data_backref *dback;
7110 struct extent_entry *entry, *best = NULL;
7113 int broken_entries = 0;
7118 * Metadata is easy and the backrefs should always agree on bytenr and
7119 * size, if not we've got bigger issues.
7124 rbtree_postorder_for_each_entry_safe(back, tmp,
7125 &rec->backref_tree, node) {
7126 if (back->full_backref || !back->is_data)
7129 dback = to_data_backref(back);
7132 * We only pay attention to backrefs that we found a real
7135 if (dback->found_ref == 0)
7139 * For now we only catch when the bytes don't match, not the
7140 * bytenr. We can easily do this at the same time, but I want
7141 * to have a fs image to test on before we just add repair
7142 * functionality willy-nilly so we know we won't screw up the
7146 entry = find_entry(&entries, dback->disk_bytenr,
7149 entry = malloc(sizeof(struct extent_entry));
7154 memset(entry, 0, sizeof(*entry));
7155 entry->bytenr = dback->disk_bytenr;
7156 entry->bytes = dback->bytes;
7157 list_add_tail(&entry->list, &entries);
7162 * If we only have on entry we may think the entries agree when
7163 * in reality they don't so we have to do some extra checking.
7165 if (dback->disk_bytenr != rec->start ||
7166 dback->bytes != rec->nr || back->broken)
7177 /* Yay all the backrefs agree, carry on good sir */
7178 if (nr_entries <= 1 && !mismatch)
7181 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7182 "%Lu\n", rec->start);
7185 * First we want to see if the backrefs can agree amongst themselves who
7186 * is right, so figure out which one of the entries has the highest
7189 best = find_most_right_entry(&entries);
7192 * Ok so we may have an even split between what the backrefs think, so
7193 * this is where we use the extent ref to see what it thinks.
7196 entry = find_entry(&entries, rec->start, rec->nr);
7197 if (!entry && (!broken_entries || !rec->found_rec)) {
7198 fprintf(stderr, "Backrefs don't agree with each other "
7199 "and extent record doesn't agree with anybody,"
7200 " so we can't fix bytenr %Lu bytes %Lu\n",
7201 rec->start, rec->nr);
7204 } else if (!entry) {
7206 * Ok our backrefs were broken, we'll assume this is the
7207 * correct value and add an entry for this range.
7209 entry = malloc(sizeof(struct extent_entry));
7214 memset(entry, 0, sizeof(*entry));
7215 entry->bytenr = rec->start;
7216 entry->bytes = rec->nr;
7217 list_add_tail(&entry->list, &entries);
7221 best = find_most_right_entry(&entries);
7223 fprintf(stderr, "Backrefs and extent record evenly "
7224 "split on who is right, this is going to "
7225 "require user input to fix bytenr %Lu bytes "
7226 "%Lu\n", rec->start, rec->nr);
7233 * I don't think this can happen currently as we'll abort() if we catch
7234 * this case higher up, but in case somebody removes that we still can't
7235 * deal with it properly here yet, so just bail out of that's the case.
7237 if (best->bytenr != rec->start) {
7238 fprintf(stderr, "Extent start and backref starts don't match, "
7239 "please use btrfs-image on this file system and send "
7240 "it to a btrfs developer so they can make fsck fix "
7241 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7242 rec->start, rec->nr);
7248 * Ok great we all agreed on an extent record, let's go find the real
7249 * references and fix up the ones that don't match.
7251 rbtree_postorder_for_each_entry_safe(back, tmp,
7252 &rec->backref_tree, node) {
7253 if (back->full_backref || !back->is_data)
7256 dback = to_data_backref(back);
7259 * Still ignoring backrefs that don't have a real ref attached
7262 if (dback->found_ref == 0)
7265 if (dback->bytes == best->bytes &&
7266 dback->disk_bytenr == best->bytenr)
7269 ret = repair_ref(info, path, dback, best);
7275 * Ok we messed with the actual refs, which means we need to drop our
7276 * entire cache and go back and rescan. I know this is a huge pain and
7277 * adds a lot of extra work, but it's the only way to be safe. Once all
7278 * the backrefs agree we may not need to do anything to the extent
7283 while (!list_empty(&entries)) {
7284 entry = list_entry(entries.next, struct extent_entry, list);
7285 list_del_init(&entry->list);
7291 static int process_duplicates(struct btrfs_root *root,
7292 struct cache_tree *extent_cache,
7293 struct extent_record *rec)
7295 struct extent_record *good, *tmp;
7296 struct cache_extent *cache;
7300 * If we found a extent record for this extent then return, or if we
7301 * have more than one duplicate we are likely going to need to delete
7304 if (rec->found_rec || rec->num_duplicates > 1)
7307 /* Shouldn't happen but just in case */
7308 BUG_ON(!rec->num_duplicates);
7311 * So this happens if we end up with a backref that doesn't match the
7312 * actual extent entry. So either the backref is bad or the extent
7313 * entry is bad. Either way we want to have the extent_record actually
7314 * reflect what we found in the extent_tree, so we need to take the
7315 * duplicate out and use that as the extent_record since the only way we
7316 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7318 remove_cache_extent(extent_cache, &rec->cache);
7320 good = to_extent_record(rec->dups.next);
7321 list_del_init(&good->list);
7322 INIT_LIST_HEAD(&good->backrefs);
7323 INIT_LIST_HEAD(&good->dups);
7324 good->cache.start = good->start;
7325 good->cache.size = good->nr;
7326 good->content_checked = 0;
7327 good->owner_ref_checked = 0;
7328 good->num_duplicates = 0;
7329 good->refs = rec->refs;
7330 list_splice_init(&rec->backrefs, &good->backrefs);
7332 cache = lookup_cache_extent(extent_cache, good->start,
7336 tmp = container_of(cache, struct extent_record, cache);
7339 * If we find another overlapping extent and it's found_rec is
7340 * set then it's a duplicate and we need to try and delete
7343 if (tmp->found_rec || tmp->num_duplicates > 0) {
7344 if (list_empty(&good->list))
7345 list_add_tail(&good->list,
7346 &duplicate_extents);
7347 good->num_duplicates += tmp->num_duplicates + 1;
7348 list_splice_init(&tmp->dups, &good->dups);
7349 list_del_init(&tmp->list);
7350 list_add_tail(&tmp->list, &good->dups);
7351 remove_cache_extent(extent_cache, &tmp->cache);
7356 * Ok we have another non extent item backed extent rec, so lets
7357 * just add it to this extent and carry on like we did above.
7359 good->refs += tmp->refs;
7360 list_splice_init(&tmp->backrefs, &good->backrefs);
7361 remove_cache_extent(extent_cache, &tmp->cache);
7364 ret = insert_cache_extent(extent_cache, &good->cache);
7367 return good->num_duplicates ? 0 : 1;
7370 static int delete_duplicate_records(struct btrfs_root *root,
7371 struct extent_record *rec)
7373 struct btrfs_trans_handle *trans;
7374 LIST_HEAD(delete_list);
7375 struct btrfs_path *path;
7376 struct extent_record *tmp, *good, *n;
7379 struct btrfs_key key;
7381 path = btrfs_alloc_path();
7388 /* Find the record that covers all of the duplicates. */
7389 list_for_each_entry(tmp, &rec->dups, list) {
7390 if (good->start < tmp->start)
7392 if (good->nr > tmp->nr)
7395 if (tmp->start + tmp->nr < good->start + good->nr) {
7396 fprintf(stderr, "Ok we have overlapping extents that "
7397 "aren't completely covered by each other, this "
7398 "is going to require more careful thought. "
7399 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7400 tmp->start, tmp->nr, good->start, good->nr);
7407 list_add_tail(&rec->list, &delete_list);
7409 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7412 list_move_tail(&tmp->list, &delete_list);
7415 root = root->fs_info->extent_root;
7416 trans = btrfs_start_transaction(root, 1);
7417 if (IS_ERR(trans)) {
7418 ret = PTR_ERR(trans);
7422 list_for_each_entry(tmp, &delete_list, list) {
7423 if (tmp->found_rec == 0)
7425 key.objectid = tmp->start;
7426 key.type = BTRFS_EXTENT_ITEM_KEY;
7427 key.offset = tmp->nr;
7429 /* Shouldn't happen but just in case */
7430 if (tmp->metadata) {
7431 fprintf(stderr, "Well this shouldn't happen, extent "
7432 "record overlaps but is metadata? "
7433 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7437 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7443 ret = btrfs_del_item(trans, root, path);
7446 btrfs_release_path(path);
7449 err = btrfs_commit_transaction(trans, root);
7453 while (!list_empty(&delete_list)) {
7454 tmp = to_extent_record(delete_list.next);
7455 list_del_init(&tmp->list);
7461 while (!list_empty(&rec->dups)) {
7462 tmp = to_extent_record(rec->dups.next);
7463 list_del_init(&tmp->list);
7467 btrfs_free_path(path);
7469 if (!ret && !nr_del)
7470 rec->num_duplicates = 0;
7472 return ret ? ret : nr_del;
7475 static int find_possible_backrefs(struct btrfs_fs_info *info,
7476 struct btrfs_path *path,
7477 struct cache_tree *extent_cache,
7478 struct extent_record *rec)
7480 struct btrfs_root *root;
7481 struct extent_backref *back, *tmp;
7482 struct data_backref *dback;
7483 struct cache_extent *cache;
7484 struct btrfs_file_extent_item *fi;
7485 struct btrfs_key key;
7489 rbtree_postorder_for_each_entry_safe(back, tmp,
7490 &rec->backref_tree, node) {
7491 /* Don't care about full backrefs (poor unloved backrefs) */
7492 if (back->full_backref || !back->is_data)
7495 dback = to_data_backref(back);
7497 /* We found this one, we don't need to do a lookup */
7498 if (dback->found_ref)
7501 key.objectid = dback->root;
7502 key.type = BTRFS_ROOT_ITEM_KEY;
7503 key.offset = (u64)-1;
7505 root = btrfs_read_fs_root(info, &key);
7507 /* No root, definitely a bad ref, skip */
7508 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7510 /* Other err, exit */
7512 return PTR_ERR(root);
7514 key.objectid = dback->owner;
7515 key.type = BTRFS_EXTENT_DATA_KEY;
7516 key.offset = dback->offset;
7517 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7519 btrfs_release_path(path);
7522 /* Didn't find it, we can carry on */
7527 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7528 struct btrfs_file_extent_item);
7529 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7530 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7531 btrfs_release_path(path);
7532 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7534 struct extent_record *tmp;
7535 tmp = container_of(cache, struct extent_record, cache);
7538 * If we found an extent record for the bytenr for this
7539 * particular backref then we can't add it to our
7540 * current extent record. We only want to add backrefs
7541 * that don't have a corresponding extent item in the
7542 * extent tree since they likely belong to this record
7543 * and we need to fix it if it doesn't match bytenrs.
7549 dback->found_ref += 1;
7550 dback->disk_bytenr = bytenr;
7551 dback->bytes = bytes;
7554 * Set this so the verify backref code knows not to trust the
7555 * values in this backref.
7564 * Record orphan data ref into corresponding root.
7566 * Return 0 if the extent item contains data ref and recorded.
7567 * Return 1 if the extent item contains no useful data ref
7568 * On that case, it may contains only shared_dataref or metadata backref
7569 * or the file extent exists(this should be handled by the extent bytenr
7571 * Return <0 if something goes wrong.
7573 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7574 struct extent_record *rec)
7576 struct btrfs_key key;
7577 struct btrfs_root *dest_root;
7578 struct extent_backref *back, *tmp;
7579 struct data_backref *dback;
7580 struct orphan_data_extent *orphan;
7581 struct btrfs_path *path;
7582 int recorded_data_ref = 0;
7587 path = btrfs_alloc_path();
7590 rbtree_postorder_for_each_entry_safe(back, tmp,
7591 &rec->backref_tree, node) {
7592 if (back->full_backref || !back->is_data ||
7593 !back->found_extent_tree)
7595 dback = to_data_backref(back);
7596 if (dback->found_ref)
7598 key.objectid = dback->root;
7599 key.type = BTRFS_ROOT_ITEM_KEY;
7600 key.offset = (u64)-1;
7602 dest_root = btrfs_read_fs_root(fs_info, &key);
7604 /* For non-exist root we just skip it */
7605 if (IS_ERR(dest_root) || !dest_root)
7608 key.objectid = dback->owner;
7609 key.type = BTRFS_EXTENT_DATA_KEY;
7610 key.offset = dback->offset;
7612 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7614 * For ret < 0, it's OK since the fs-tree may be corrupted,
7615 * we need to record it for inode/file extent rebuild.
7616 * For ret > 0, we record it only for file extent rebuild.
7617 * For ret == 0, the file extent exists but only bytenr
7618 * mismatch, let the original bytenr fix routine to handle,
7624 orphan = malloc(sizeof(*orphan));
7629 INIT_LIST_HEAD(&orphan->list);
7630 orphan->root = dback->root;
7631 orphan->objectid = dback->owner;
7632 orphan->offset = dback->offset;
7633 orphan->disk_bytenr = rec->cache.start;
7634 orphan->disk_len = rec->cache.size;
7635 list_add(&dest_root->orphan_data_extents, &orphan->list);
7636 recorded_data_ref = 1;
7639 btrfs_free_path(path);
7641 return !recorded_data_ref;
7647 * when an incorrect extent item is found, this will delete
7648 * all of the existing entries for it and recreate them
7649 * based on what the tree scan found.
7651 static int fixup_extent_refs(struct btrfs_fs_info *info,
7652 struct cache_tree *extent_cache,
7653 struct extent_record *rec)
7655 struct btrfs_trans_handle *trans = NULL;
7657 struct btrfs_path *path;
7658 struct cache_extent *cache;
7659 struct extent_backref *back, *tmp;
7663 if (rec->flag_block_full_backref)
7664 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7666 path = btrfs_alloc_path();
7670 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7672 * Sometimes the backrefs themselves are so broken they don't
7673 * get attached to any meaningful rec, so first go back and
7674 * check any of our backrefs that we couldn't find and throw
7675 * them into the list if we find the backref so that
7676 * verify_backrefs can figure out what to do.
7678 ret = find_possible_backrefs(info, path, extent_cache, rec);
7683 /* step one, make sure all of the backrefs agree */
7684 ret = verify_backrefs(info, path, rec);
7688 trans = btrfs_start_transaction(info->extent_root, 1);
7689 if (IS_ERR(trans)) {
7690 ret = PTR_ERR(trans);
7694 /* step two, delete all the existing records */
7695 ret = delete_extent_records(trans, info->extent_root, path,
7696 rec->start, rec->max_size);
7701 /* was this block corrupt? If so, don't add references to it */
7702 cache = lookup_cache_extent(info->corrupt_blocks,
7703 rec->start, rec->max_size);
7709 /* step three, recreate all the refs we did find */
7710 rbtree_postorder_for_each_entry_safe(back, tmp,
7711 &rec->backref_tree, node) {
7713 * if we didn't find any references, don't create a
7716 if (!back->found_ref)
7719 rec->bad_full_backref = 0;
7720 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7728 int err = btrfs_commit_transaction(trans, info->extent_root);
7733 btrfs_free_path(path);
7737 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7738 struct extent_record *rec)
7740 struct btrfs_trans_handle *trans;
7741 struct btrfs_root *root = fs_info->extent_root;
7742 struct btrfs_path *path;
7743 struct btrfs_extent_item *ei;
7744 struct btrfs_key key;
7748 key.objectid = rec->start;
7749 if (rec->metadata) {
7750 key.type = BTRFS_METADATA_ITEM_KEY;
7751 key.offset = rec->info_level;
7753 key.type = BTRFS_EXTENT_ITEM_KEY;
7754 key.offset = rec->max_size;
7757 path = btrfs_alloc_path();
7761 trans = btrfs_start_transaction(root, 0);
7762 if (IS_ERR(trans)) {
7763 btrfs_free_path(path);
7764 return PTR_ERR(trans);
7767 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7769 btrfs_free_path(path);
7770 btrfs_commit_transaction(trans, root);
7773 fprintf(stderr, "Didn't find extent for %llu\n",
7774 (unsigned long long)rec->start);
7775 btrfs_free_path(path);
7776 btrfs_commit_transaction(trans, root);
7780 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7781 struct btrfs_extent_item);
7782 flags = btrfs_extent_flags(path->nodes[0], ei);
7783 if (rec->flag_block_full_backref) {
7784 fprintf(stderr, "setting full backref on %llu\n",
7785 (unsigned long long)key.objectid);
7786 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7788 fprintf(stderr, "clearing full backref on %llu\n",
7789 (unsigned long long)key.objectid);
7790 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7792 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7793 btrfs_mark_buffer_dirty(path->nodes[0]);
7794 btrfs_free_path(path);
7795 return btrfs_commit_transaction(trans, root);
7798 /* right now we only prune from the extent allocation tree */
7799 static int prune_one_block(struct btrfs_trans_handle *trans,
7800 struct btrfs_fs_info *info,
7801 struct btrfs_corrupt_block *corrupt)
7804 struct btrfs_path path;
7805 struct extent_buffer *eb;
7809 int level = corrupt->level + 1;
7811 btrfs_init_path(&path);
7813 /* we want to stop at the parent to our busted block */
7814 path.lowest_level = level;
7816 ret = btrfs_search_slot(trans, info->extent_root,
7817 &corrupt->key, &path, -1, 1);
7822 eb = path.nodes[level];
7829 * hopefully the search gave us the block we want to prune,
7830 * lets try that first
7832 slot = path.slots[level];
7833 found = btrfs_node_blockptr(eb, slot);
7834 if (found == corrupt->cache.start)
7837 nritems = btrfs_header_nritems(eb);
7839 /* the search failed, lets scan this node and hope we find it */
7840 for (slot = 0; slot < nritems; slot++) {
7841 found = btrfs_node_blockptr(eb, slot);
7842 if (found == corrupt->cache.start)
7846 * we couldn't find the bad block. TODO, search all the nodes for pointers
7849 if (eb == info->extent_root->node) {
7854 btrfs_release_path(&path);
7859 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7860 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7863 btrfs_release_path(&path);
7867 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7869 struct btrfs_trans_handle *trans = NULL;
7870 struct cache_extent *cache;
7871 struct btrfs_corrupt_block *corrupt;
7874 cache = search_cache_extent(info->corrupt_blocks, 0);
7878 trans = btrfs_start_transaction(info->extent_root, 1);
7880 return PTR_ERR(trans);
7882 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7883 prune_one_block(trans, info, corrupt);
7884 remove_cache_extent(info->corrupt_blocks, cache);
7887 return btrfs_commit_transaction(trans, info->extent_root);
7891 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7893 struct btrfs_block_group_cache *cache;
7898 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7899 &start, &end, EXTENT_DIRTY);
7902 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7908 cache = btrfs_lookup_first_block_group(fs_info, start);
7913 start = cache->key.objectid + cache->key.offset;
7917 static int check_extent_refs(struct btrfs_root *root,
7918 struct cache_tree *extent_cache)
7920 struct extent_record *rec;
7921 struct cache_extent *cache;
7930 * if we're doing a repair, we have to make sure
7931 * we don't allocate from the problem extents.
7932 * In the worst case, this will be all the
7935 cache = search_cache_extent(extent_cache, 0);
7937 rec = container_of(cache, struct extent_record, cache);
7938 set_extent_dirty(root->fs_info->excluded_extents,
7940 rec->start + rec->max_size - 1,
7942 cache = next_cache_extent(cache);
7945 /* pin down all the corrupted blocks too */
7946 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7948 set_extent_dirty(root->fs_info->excluded_extents,
7950 cache->start + cache->size - 1,
7952 cache = next_cache_extent(cache);
7954 prune_corrupt_blocks(root->fs_info);
7955 reset_cached_block_groups(root->fs_info);
7958 reset_cached_block_groups(root->fs_info);
7961 * We need to delete any duplicate entries we find first otherwise we
7962 * could mess up the extent tree when we have backrefs that actually
7963 * belong to a different extent item and not the weird duplicate one.
7965 while (repair && !list_empty(&duplicate_extents)) {
7966 rec = to_extent_record(duplicate_extents.next);
7967 list_del_init(&rec->list);
7969 /* Sometimes we can find a backref before we find an actual
7970 * extent, so we need to process it a little bit to see if there
7971 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7972 * if this is a backref screwup. If we need to delete stuff
7973 * process_duplicates() will return 0, otherwise it will return
7976 if (process_duplicates(root, extent_cache, rec))
7978 ret = delete_duplicate_records(root, rec);
7982 * delete_duplicate_records will return the number of entries
7983 * deleted, so if it's greater than 0 then we know we actually
7984 * did something and we need to remove.
7998 cache = search_cache_extent(extent_cache, 0);
8001 rec = container_of(cache, struct extent_record, cache);
8002 if (rec->num_duplicates) {
8003 fprintf(stderr, "extent item %llu has multiple extent "
8004 "items\n", (unsigned long long)rec->start);
8009 if (rec->refs != rec->extent_item_refs) {
8010 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8011 (unsigned long long)rec->start,
8012 (unsigned long long)rec->nr);
8013 fprintf(stderr, "extent item %llu, found %llu\n",
8014 (unsigned long long)rec->extent_item_refs,
8015 (unsigned long long)rec->refs);
8016 ret = record_orphan_data_extents(root->fs_info, rec);
8023 * we can't use the extent to repair file
8024 * extent, let the fallback method handle it.
8026 if (!fixed && repair) {
8027 ret = fixup_extent_refs(
8038 if (all_backpointers_checked(rec, 1)) {
8039 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8040 (unsigned long long)rec->start,
8041 (unsigned long long)rec->nr);
8043 if (!fixed && !recorded && repair) {
8044 ret = fixup_extent_refs(root->fs_info,
8053 if (!rec->owner_ref_checked) {
8054 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8055 (unsigned long long)rec->start,
8056 (unsigned long long)rec->nr);
8057 if (!fixed && !recorded && repair) {
8058 ret = fixup_extent_refs(root->fs_info,
8067 if (rec->bad_full_backref) {
8068 fprintf(stderr, "bad full backref, on [%llu]\n",
8069 (unsigned long long)rec->start);
8071 ret = fixup_extent_flags(root->fs_info, rec);
8080 * Although it's not a extent ref's problem, we reuse this
8081 * routine for error reporting.
8082 * No repair function yet.
8084 if (rec->crossing_stripes) {
8086 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8087 rec->start, rec->start + rec->max_size);
8092 if (rec->wrong_chunk_type) {
8094 "bad extent [%llu, %llu), type mismatch with chunk\n",
8095 rec->start, rec->start + rec->max_size);
8100 remove_cache_extent(extent_cache, cache);
8101 free_all_extent_backrefs(rec);
8102 if (!init_extent_tree && repair && (!cur_err || fixed))
8103 clear_extent_dirty(root->fs_info->excluded_extents,
8105 rec->start + rec->max_size - 1,
8111 if (ret && ret != -EAGAIN) {
8112 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8115 struct btrfs_trans_handle *trans;
8117 root = root->fs_info->extent_root;
8118 trans = btrfs_start_transaction(root, 1);
8119 if (IS_ERR(trans)) {
8120 ret = PTR_ERR(trans);
8124 btrfs_fix_block_accounting(trans, root);
8125 ret = btrfs_commit_transaction(trans, root);
8130 fprintf(stderr, "repaired damaged extent references\n");
8136 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8140 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8141 stripe_size = length;
8142 stripe_size /= num_stripes;
8143 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8144 stripe_size = length * 2;
8145 stripe_size /= num_stripes;
8146 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8147 stripe_size = length;
8148 stripe_size /= (num_stripes - 1);
8149 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8150 stripe_size = length;
8151 stripe_size /= (num_stripes - 2);
8153 stripe_size = length;
8159 * Check the chunk with its block group/dev list ref:
8160 * Return 0 if all refs seems valid.
8161 * Return 1 if part of refs seems valid, need later check for rebuild ref
8162 * like missing block group and needs to search extent tree to rebuild them.
8163 * Return -1 if essential refs are missing and unable to rebuild.
8165 static int check_chunk_refs(struct chunk_record *chunk_rec,
8166 struct block_group_tree *block_group_cache,
8167 struct device_extent_tree *dev_extent_cache,
8170 struct cache_extent *block_group_item;
8171 struct block_group_record *block_group_rec;
8172 struct cache_extent *dev_extent_item;
8173 struct device_extent_record *dev_extent_rec;
8177 int metadump_v2 = 0;
8181 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8184 if (block_group_item) {
8185 block_group_rec = container_of(block_group_item,
8186 struct block_group_record,
8188 if (chunk_rec->length != block_group_rec->offset ||
8189 chunk_rec->offset != block_group_rec->objectid ||
8191 chunk_rec->type_flags != block_group_rec->flags)) {
8194 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8195 chunk_rec->objectid,
8200 chunk_rec->type_flags,
8201 block_group_rec->objectid,
8202 block_group_rec->type,
8203 block_group_rec->offset,
8204 block_group_rec->offset,
8205 block_group_rec->objectid,
8206 block_group_rec->flags);
8209 list_del_init(&block_group_rec->list);
8210 chunk_rec->bg_rec = block_group_rec;
8215 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8216 chunk_rec->objectid,
8221 chunk_rec->type_flags);
8228 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8229 chunk_rec->num_stripes);
8230 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8231 devid = chunk_rec->stripes[i].devid;
8232 offset = chunk_rec->stripes[i].offset;
8233 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8234 devid, offset, length);
8235 if (dev_extent_item) {
8236 dev_extent_rec = container_of(dev_extent_item,
8237 struct device_extent_record,
8239 if (dev_extent_rec->objectid != devid ||
8240 dev_extent_rec->offset != offset ||
8241 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8242 dev_extent_rec->length != length) {
8245 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8246 chunk_rec->objectid,
8249 chunk_rec->stripes[i].devid,
8250 chunk_rec->stripes[i].offset,
8251 dev_extent_rec->objectid,
8252 dev_extent_rec->offset,
8253 dev_extent_rec->length);
8256 list_move(&dev_extent_rec->chunk_list,
8257 &chunk_rec->dextents);
8262 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8263 chunk_rec->objectid,
8266 chunk_rec->stripes[i].devid,
8267 chunk_rec->stripes[i].offset);
8274 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8275 int check_chunks(struct cache_tree *chunk_cache,
8276 struct block_group_tree *block_group_cache,
8277 struct device_extent_tree *dev_extent_cache,
8278 struct list_head *good, struct list_head *bad,
8279 struct list_head *rebuild, int silent)
8281 struct cache_extent *chunk_item;
8282 struct chunk_record *chunk_rec;
8283 struct block_group_record *bg_rec;
8284 struct device_extent_record *dext_rec;
8288 chunk_item = first_cache_extent(chunk_cache);
8289 while (chunk_item) {
8290 chunk_rec = container_of(chunk_item, struct chunk_record,
8292 err = check_chunk_refs(chunk_rec, block_group_cache,
8293 dev_extent_cache, silent);
8296 if (err == 0 && good)
8297 list_add_tail(&chunk_rec->list, good);
8298 if (err > 0 && rebuild)
8299 list_add_tail(&chunk_rec->list, rebuild);
8301 list_add_tail(&chunk_rec->list, bad);
8302 chunk_item = next_cache_extent(chunk_item);
8305 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8308 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8316 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8320 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8331 static int check_device_used(struct device_record *dev_rec,
8332 struct device_extent_tree *dext_cache)
8334 struct cache_extent *cache;
8335 struct device_extent_record *dev_extent_rec;
8338 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8340 dev_extent_rec = container_of(cache,
8341 struct device_extent_record,
8343 if (dev_extent_rec->objectid != dev_rec->devid)
8346 list_del_init(&dev_extent_rec->device_list);
8347 total_byte += dev_extent_rec->length;
8348 cache = next_cache_extent(cache);
8351 if (total_byte != dev_rec->byte_used) {
8353 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8354 total_byte, dev_rec->byte_used, dev_rec->objectid,
8355 dev_rec->type, dev_rec->offset);
8362 /* check btrfs_dev_item -> btrfs_dev_extent */
8363 static int check_devices(struct rb_root *dev_cache,
8364 struct device_extent_tree *dev_extent_cache)
8366 struct rb_node *dev_node;
8367 struct device_record *dev_rec;
8368 struct device_extent_record *dext_rec;
8372 dev_node = rb_first(dev_cache);
8374 dev_rec = container_of(dev_node, struct device_record, node);
8375 err = check_device_used(dev_rec, dev_extent_cache);
8379 dev_node = rb_next(dev_node);
8381 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8384 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8385 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8392 static int add_root_item_to_list(struct list_head *head,
8393 u64 objectid, u64 bytenr, u64 last_snapshot,
8394 u8 level, u8 drop_level,
8395 int level_size, struct btrfs_key *drop_key)
8398 struct root_item_record *ri_rec;
8399 ri_rec = malloc(sizeof(*ri_rec));
8402 ri_rec->bytenr = bytenr;
8403 ri_rec->objectid = objectid;
8404 ri_rec->level = level;
8405 ri_rec->level_size = level_size;
8406 ri_rec->drop_level = drop_level;
8407 ri_rec->last_snapshot = last_snapshot;
8409 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8410 list_add_tail(&ri_rec->list, head);
8415 static void free_root_item_list(struct list_head *list)
8417 struct root_item_record *ri_rec;
8419 while (!list_empty(list)) {
8420 ri_rec = list_first_entry(list, struct root_item_record,
8422 list_del_init(&ri_rec->list);
8427 static int deal_root_from_list(struct list_head *list,
8428 struct btrfs_root *root,
8429 struct block_info *bits,
8431 struct cache_tree *pending,
8432 struct cache_tree *seen,
8433 struct cache_tree *reada,
8434 struct cache_tree *nodes,
8435 struct cache_tree *extent_cache,
8436 struct cache_tree *chunk_cache,
8437 struct rb_root *dev_cache,
8438 struct block_group_tree *block_group_cache,
8439 struct device_extent_tree *dev_extent_cache)
8444 while (!list_empty(list)) {
8445 struct root_item_record *rec;
8446 struct extent_buffer *buf;
8447 rec = list_entry(list->next,
8448 struct root_item_record, list);
8450 buf = read_tree_block(root->fs_info->tree_root,
8451 rec->bytenr, rec->level_size, 0);
8452 if (!extent_buffer_uptodate(buf)) {
8453 free_extent_buffer(buf);
8457 ret = add_root_to_pending(buf, extent_cache, pending,
8458 seen, nodes, rec->objectid);
8462 * To rebuild extent tree, we need deal with snapshot
8463 * one by one, otherwise we deal with node firstly which
8464 * can maximize readahead.
8467 ret = run_next_block(root, bits, bits_nr, &last,
8468 pending, seen, reada, nodes,
8469 extent_cache, chunk_cache,
8470 dev_cache, block_group_cache,
8471 dev_extent_cache, rec);
8475 free_extent_buffer(buf);
8476 list_del(&rec->list);
8482 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8483 reada, nodes, extent_cache, chunk_cache,
8484 dev_cache, block_group_cache,
8485 dev_extent_cache, NULL);
8495 static int check_chunks_and_extents(struct btrfs_root *root)
8497 struct rb_root dev_cache;
8498 struct cache_tree chunk_cache;
8499 struct block_group_tree block_group_cache;
8500 struct device_extent_tree dev_extent_cache;
8501 struct cache_tree extent_cache;
8502 struct cache_tree seen;
8503 struct cache_tree pending;
8504 struct cache_tree reada;
8505 struct cache_tree nodes;
8506 struct extent_io_tree excluded_extents;
8507 struct cache_tree corrupt_blocks;
8508 struct btrfs_path path;
8509 struct btrfs_key key;
8510 struct btrfs_key found_key;
8512 struct block_info *bits;
8514 struct extent_buffer *leaf;
8516 struct btrfs_root_item ri;
8517 struct list_head dropping_trees;
8518 struct list_head normal_trees;
8519 struct btrfs_root *root1;
8524 dev_cache = RB_ROOT;
8525 cache_tree_init(&chunk_cache);
8526 block_group_tree_init(&block_group_cache);
8527 device_extent_tree_init(&dev_extent_cache);
8529 cache_tree_init(&extent_cache);
8530 cache_tree_init(&seen);
8531 cache_tree_init(&pending);
8532 cache_tree_init(&nodes);
8533 cache_tree_init(&reada);
8534 cache_tree_init(&corrupt_blocks);
8535 extent_io_tree_init(&excluded_extents);
8536 INIT_LIST_HEAD(&dropping_trees);
8537 INIT_LIST_HEAD(&normal_trees);
8540 root->fs_info->excluded_extents = &excluded_extents;
8541 root->fs_info->fsck_extent_cache = &extent_cache;
8542 root->fs_info->free_extent_hook = free_extent_hook;
8543 root->fs_info->corrupt_blocks = &corrupt_blocks;
8547 bits = malloc(bits_nr * sizeof(struct block_info));
8553 if (ctx.progress_enabled) {
8554 ctx.tp = TASK_EXTENTS;
8555 task_start(ctx.info);
8559 root1 = root->fs_info->tree_root;
8560 level = btrfs_header_level(root1->node);
8561 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8562 root1->node->start, 0, level, 0,
8563 root1->nodesize, NULL);
8566 root1 = root->fs_info->chunk_root;
8567 level = btrfs_header_level(root1->node);
8568 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8569 root1->node->start, 0, level, 0,
8570 root1->nodesize, NULL);
8573 btrfs_init_path(&path);
8576 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8577 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8582 leaf = path.nodes[0];
8583 slot = path.slots[0];
8584 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8585 ret = btrfs_next_leaf(root, &path);
8588 leaf = path.nodes[0];
8589 slot = path.slots[0];
8591 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8592 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8593 unsigned long offset;
8596 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8597 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8598 last_snapshot = btrfs_root_last_snapshot(&ri);
8599 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8600 level = btrfs_root_level(&ri);
8601 level_size = root->nodesize;
8602 ret = add_root_item_to_list(&normal_trees,
8604 btrfs_root_bytenr(&ri),
8605 last_snapshot, level,
8606 0, level_size, NULL);
8610 level = btrfs_root_level(&ri);
8611 level_size = root->nodesize;
8612 objectid = found_key.objectid;
8613 btrfs_disk_key_to_cpu(&found_key,
8615 ret = add_root_item_to_list(&dropping_trees,
8617 btrfs_root_bytenr(&ri),
8618 last_snapshot, level,
8620 level_size, &found_key);
8627 btrfs_release_path(&path);
8630 * check_block can return -EAGAIN if it fixes something, please keep
8631 * this in mind when dealing with return values from these functions, if
8632 * we get -EAGAIN we want to fall through and restart the loop.
8634 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8635 &seen, &reada, &nodes, &extent_cache,
8636 &chunk_cache, &dev_cache, &block_group_cache,
8643 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8644 &pending, &seen, &reada, &nodes,
8645 &extent_cache, &chunk_cache, &dev_cache,
8646 &block_group_cache, &dev_extent_cache);
8653 ret = check_chunks(&chunk_cache, &block_group_cache,
8654 &dev_extent_cache, NULL, NULL, NULL, 0);
8661 ret = check_extent_refs(root, &extent_cache);
8668 ret = check_devices(&dev_cache, &dev_extent_cache);
8673 task_stop(ctx.info);
8675 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8676 extent_io_tree_cleanup(&excluded_extents);
8677 root->fs_info->fsck_extent_cache = NULL;
8678 root->fs_info->free_extent_hook = NULL;
8679 root->fs_info->corrupt_blocks = NULL;
8680 root->fs_info->excluded_extents = NULL;
8683 free_chunk_cache_tree(&chunk_cache);
8684 free_device_cache_tree(&dev_cache);
8685 free_block_group_tree(&block_group_cache);
8686 free_device_extent_tree(&dev_extent_cache);
8687 free_extent_cache_tree(&seen);
8688 free_extent_cache_tree(&pending);
8689 free_extent_cache_tree(&reada);
8690 free_extent_cache_tree(&nodes);
8693 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8694 free_extent_cache_tree(&seen);
8695 free_extent_cache_tree(&pending);
8696 free_extent_cache_tree(&reada);
8697 free_extent_cache_tree(&nodes);
8698 free_chunk_cache_tree(&chunk_cache);
8699 free_block_group_tree(&block_group_cache);
8700 free_device_cache_tree(&dev_cache);
8701 free_device_extent_tree(&dev_extent_cache);
8702 free_extent_record_cache(root->fs_info, &extent_cache);
8703 free_root_item_list(&normal_trees);
8704 free_root_item_list(&dropping_trees);
8705 extent_io_tree_cleanup(&excluded_extents);
8710 * Check backrefs of a tree block given by @bytenr or @eb.
8712 * @root: the root containing the @bytenr or @eb
8713 * @eb: tree block extent buffer, can be NULL
8714 * @bytenr: bytenr of the tree block to search
8715 * @level: tree level of the tree block
8716 * @owner: owner of the tree block
8718 * Return >0 for any error found and output error message
8719 * Return 0 for no error found
8721 static int check_tree_block_ref(struct btrfs_root *root,
8722 struct extent_buffer *eb, u64 bytenr,
8723 int level, u64 owner)
8725 struct btrfs_key key;
8726 struct btrfs_root *extent_root = root->fs_info->extent_root;
8727 struct btrfs_path path;
8728 struct btrfs_extent_item *ei;
8729 struct btrfs_extent_inline_ref *iref;
8730 struct extent_buffer *leaf;
8736 u32 nodesize = root->nodesize;
8743 btrfs_init_path(&path);
8744 key.objectid = bytenr;
8745 if (btrfs_fs_incompat(root->fs_info,
8746 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8747 key.type = BTRFS_METADATA_ITEM_KEY;
8749 key.type = BTRFS_EXTENT_ITEM_KEY;
8750 key.offset = (u64)-1;
8752 /* Search for the backref in extent tree */
8753 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8755 err |= BACKREF_MISSING;
8758 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8760 err |= BACKREF_MISSING;
8764 leaf = path.nodes[0];
8765 slot = path.slots[0];
8766 btrfs_item_key_to_cpu(leaf, &key, slot);
8768 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8770 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8771 skinny_level = (int)key.offset;
8772 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8774 struct btrfs_tree_block_info *info;
8776 info = (struct btrfs_tree_block_info *)(ei + 1);
8777 skinny_level = btrfs_tree_block_level(leaf, info);
8778 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8785 if (!(btrfs_extent_flags(leaf, ei) &
8786 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8788 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8789 key.objectid, nodesize,
8790 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8791 err = BACKREF_MISMATCH;
8793 header_gen = btrfs_header_generation(eb);
8794 extent_gen = btrfs_extent_generation(leaf, ei);
8795 if (header_gen != extent_gen) {
8797 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8798 key.objectid, nodesize, header_gen,
8800 err = BACKREF_MISMATCH;
8802 if (level != skinny_level) {
8804 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8805 key.objectid, nodesize, level, skinny_level);
8806 err = BACKREF_MISMATCH;
8808 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8810 "extent[%llu %u] is referred by other roots than %llu",
8811 key.objectid, nodesize, root->objectid);
8812 err = BACKREF_MISMATCH;
8817 * Iterate the extent/metadata item to find the exact backref
8819 item_size = btrfs_item_size_nr(leaf, slot);
8820 ptr = (unsigned long)iref;
8821 end = (unsigned long)ei + item_size;
8823 iref = (struct btrfs_extent_inline_ref *)ptr;
8824 type = btrfs_extent_inline_ref_type(leaf, iref);
8825 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8827 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8828 (offset == root->objectid || offset == owner)) {
8830 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8831 /* Check if the backref points to valid referencer */
8832 found_ref = !check_tree_block_ref(root, NULL, offset,
8838 ptr += btrfs_extent_inline_ref_size(type);
8842 * Inlined extent item doesn't have what we need, check
8843 * TREE_BLOCK_REF_KEY
8846 btrfs_release_path(&path);
8847 key.objectid = bytenr;
8848 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8849 key.offset = root->objectid;
8851 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8856 err |= BACKREF_MISSING;
8858 btrfs_release_path(&path);
8859 if (eb && (err & BACKREF_MISSING))
8860 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8861 bytenr, nodesize, owner, level);
8866 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8868 * Return >0 any error found and output error message
8869 * Return 0 for no error found
8871 static int check_extent_data_item(struct btrfs_root *root,
8872 struct extent_buffer *eb, int slot)
8874 struct btrfs_file_extent_item *fi;
8875 struct btrfs_path path;
8876 struct btrfs_root *extent_root = root->fs_info->extent_root;
8877 struct btrfs_key fi_key;
8878 struct btrfs_key dbref_key;
8879 struct extent_buffer *leaf;
8880 struct btrfs_extent_item *ei;
8881 struct btrfs_extent_inline_ref *iref;
8882 struct btrfs_extent_data_ref *dref;
8884 u64 file_extent_gen;
8887 u64 extent_num_bytes;
8895 int found_dbackref = 0;
8899 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8900 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8901 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8903 /* Nothing to check for hole and inline data extents */
8904 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8905 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8908 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8909 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8910 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8912 /* Check unaligned disk_num_bytes and num_bytes */
8913 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8915 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8916 fi_key.objectid, fi_key.offset, disk_num_bytes,
8918 err |= BYTES_UNALIGNED;
8920 data_bytes_allocated += disk_num_bytes;
8922 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8924 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8925 fi_key.objectid, fi_key.offset, extent_num_bytes,
8927 err |= BYTES_UNALIGNED;
8929 data_bytes_referenced += extent_num_bytes;
8931 owner = btrfs_header_owner(eb);
8933 /* Check the extent item of the file extent in extent tree */
8934 btrfs_init_path(&path);
8935 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8936 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8937 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8939 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8941 err |= BACKREF_MISSING;
8945 leaf = path.nodes[0];
8946 slot = path.slots[0];
8947 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8949 extent_flags = btrfs_extent_flags(leaf, ei);
8950 extent_gen = btrfs_extent_generation(leaf, ei);
8952 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8954 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8955 disk_bytenr, disk_num_bytes,
8956 BTRFS_EXTENT_FLAG_DATA);
8957 err |= BACKREF_MISMATCH;
8960 if (file_extent_gen < extent_gen) {
8962 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8963 disk_bytenr, disk_num_bytes, file_extent_gen,
8965 err |= BACKREF_MISMATCH;
8968 /* Check data backref inside that extent item */
8969 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8970 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8971 ptr = (unsigned long)iref;
8972 end = (unsigned long)ei + item_size;
8974 iref = (struct btrfs_extent_inline_ref *)ptr;
8975 type = btrfs_extent_inline_ref_type(leaf, iref);
8976 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8978 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8979 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8980 if (ref_root == owner || ref_root == root->objectid)
8982 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8983 found_dbackref = !check_tree_block_ref(root, NULL,
8984 btrfs_extent_inline_ref_offset(leaf, iref),
8990 ptr += btrfs_extent_inline_ref_size(type);
8993 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8994 if (!found_dbackref) {
8995 btrfs_release_path(&path);
8997 btrfs_init_path(&path);
8998 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8999 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9000 dbref_key.offset = hash_extent_data_ref(root->objectid,
9001 fi_key.objectid, fi_key.offset);
9003 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9004 &dbref_key, &path, 0, 0);
9009 if (!found_dbackref)
9010 err |= BACKREF_MISSING;
9012 btrfs_release_path(&path);
9013 if (err & BACKREF_MISSING) {
9014 error("data extent[%llu %llu] backref lost",
9015 disk_bytenr, disk_num_bytes);
9021 * Get real tree block level for the case like shared block
9022 * Return >= 0 as tree level
9023 * Return <0 for error
9025 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9027 struct extent_buffer *eb;
9028 struct btrfs_path path;
9029 struct btrfs_key key;
9030 struct btrfs_extent_item *ei;
9033 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9038 /* Search extent tree for extent generation and level */
9039 key.objectid = bytenr;
9040 key.type = BTRFS_METADATA_ITEM_KEY;
9041 key.offset = (u64)-1;
9043 btrfs_init_path(&path);
9044 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9047 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9055 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9056 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9057 struct btrfs_extent_item);
9058 flags = btrfs_extent_flags(path.nodes[0], ei);
9059 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9064 /* Get transid for later read_tree_block() check */
9065 transid = btrfs_extent_generation(path.nodes[0], ei);
9067 /* Get backref level as one source */
9068 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9069 backref_level = key.offset;
9071 struct btrfs_tree_block_info *info;
9073 info = (struct btrfs_tree_block_info *)(ei + 1);
9074 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9076 btrfs_release_path(&path);
9078 /* Get level from tree block as an alternative source */
9079 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9080 if (!extent_buffer_uptodate(eb)) {
9081 free_extent_buffer(eb);
9084 header_level = btrfs_header_level(eb);
9085 free_extent_buffer(eb);
9087 if (header_level != backref_level)
9089 return header_level;
9092 btrfs_release_path(&path);
9097 * Check if a tree block backref is valid (points to a valid tree block)
9098 * if level == -1, level will be resolved
9099 * Return >0 for any error found and print error message
9101 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9102 u64 bytenr, int level)
9104 struct btrfs_root *root;
9105 struct btrfs_key key;
9106 struct btrfs_path path;
9107 struct extent_buffer *eb;
9108 struct extent_buffer *node;
9109 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9113 /* Query level for level == -1 special case */
9115 level = query_tree_block_level(fs_info, bytenr);
9117 err |= REFERENCER_MISSING;
9121 key.objectid = root_id;
9122 key.type = BTRFS_ROOT_ITEM_KEY;
9123 key.offset = (u64)-1;
9125 root = btrfs_read_fs_root(fs_info, &key);
9127 err |= REFERENCER_MISSING;
9131 /* Read out the tree block to get item/node key */
9132 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9133 if (!extent_buffer_uptodate(eb)) {
9134 err |= REFERENCER_MISSING;
9135 free_extent_buffer(eb);
9139 /* Empty tree, no need to check key */
9140 if (!btrfs_header_nritems(eb) && !level) {
9141 free_extent_buffer(eb);
9146 btrfs_node_key_to_cpu(eb, &key, 0);
9148 btrfs_item_key_to_cpu(eb, &key, 0);
9150 free_extent_buffer(eb);
9152 btrfs_init_path(&path);
9153 /* Search with the first key, to ensure we can reach it */
9154 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9156 err |= REFERENCER_MISSING;
9160 node = path.nodes[level];
9161 if (btrfs_header_bytenr(node) != bytenr) {
9163 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9164 bytenr, nodesize, bytenr,
9165 btrfs_header_bytenr(node));
9166 err |= REFERENCER_MISMATCH;
9168 if (btrfs_header_level(node) != level) {
9170 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9171 bytenr, nodesize, level,
9172 btrfs_header_level(node));
9173 err |= REFERENCER_MISMATCH;
9177 btrfs_release_path(&path);
9179 if (err & REFERENCER_MISSING) {
9181 error("extent [%llu %d] lost referencer (owner: %llu)",
9182 bytenr, nodesize, root_id);
9185 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9186 bytenr, nodesize, root_id, level);
9193 * Check referencer for shared block backref
9194 * If level == -1, this function will resolve the level.
9196 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9197 u64 parent, u64 bytenr, int level)
9199 struct extent_buffer *eb;
9200 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9202 int found_parent = 0;
9205 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9206 if (!extent_buffer_uptodate(eb))
9210 level = query_tree_block_level(fs_info, bytenr);
9214 if (level + 1 != btrfs_header_level(eb))
9217 nr = btrfs_header_nritems(eb);
9218 for (i = 0; i < nr; i++) {
9219 if (bytenr == btrfs_node_blockptr(eb, i)) {
9225 free_extent_buffer(eb);
9226 if (!found_parent) {
9228 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9229 bytenr, nodesize, parent, level);
9230 return REFERENCER_MISSING;
9236 * Check referencer for normal (inlined) data ref
9237 * If len == 0, it will be resolved by searching in extent tree
9239 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9240 u64 root_id, u64 objectid, u64 offset,
9241 u64 bytenr, u64 len, u32 count)
9243 struct btrfs_root *root;
9244 struct btrfs_root *extent_root = fs_info->extent_root;
9245 struct btrfs_key key;
9246 struct btrfs_path path;
9247 struct extent_buffer *leaf;
9248 struct btrfs_file_extent_item *fi;
9249 u32 found_count = 0;
9254 key.objectid = bytenr;
9255 key.type = BTRFS_EXTENT_ITEM_KEY;
9256 key.offset = (u64)-1;
9258 btrfs_init_path(&path);
9259 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9262 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9265 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9266 if (key.objectid != bytenr ||
9267 key.type != BTRFS_EXTENT_ITEM_KEY)
9270 btrfs_release_path(&path);
9272 key.objectid = root_id;
9273 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9274 key.offset = (u64)-1;
9275 btrfs_init_path(&path);
9277 root = btrfs_read_fs_root(fs_info, &key);
9281 key.objectid = objectid;
9282 key.type = BTRFS_EXTENT_DATA_KEY;
9284 * It can be nasty as data backref offset is
9285 * file offset - file extent offset, which is smaller or
9286 * equal to original backref offset. The only special case is
9287 * overflow. So we need to special check and do further search.
9289 key.offset = offset & (1ULL << 63) ? 0 : offset;
9291 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9296 * Search afterwards to get correct one
9297 * NOTE: As we must do a comprehensive check on the data backref to
9298 * make sure the dref count also matches, we must iterate all file
9299 * extents for that inode.
9302 leaf = path.nodes[0];
9303 slot = path.slots[0];
9305 btrfs_item_key_to_cpu(leaf, &key, slot);
9306 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9308 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9310 * Except normal disk bytenr and disk num bytes, we still
9311 * need to do extra check on dbackref offset as
9312 * dbackref offset = file_offset - file_extent_offset
9314 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9315 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9316 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9320 ret = btrfs_next_item(root, &path);
9325 btrfs_release_path(&path);
9326 if (found_count != count) {
9328 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9329 bytenr, len, root_id, objectid, offset, count, found_count);
9330 return REFERENCER_MISSING;
9336 * Check if the referencer of a shared data backref exists
9338 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9339 u64 parent, u64 bytenr)
9341 struct extent_buffer *eb;
9342 struct btrfs_key key;
9343 struct btrfs_file_extent_item *fi;
9344 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9346 int found_parent = 0;
9349 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9350 if (!extent_buffer_uptodate(eb))
9353 nr = btrfs_header_nritems(eb);
9354 for (i = 0; i < nr; i++) {
9355 btrfs_item_key_to_cpu(eb, &key, i);
9356 if (key.type != BTRFS_EXTENT_DATA_KEY)
9359 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9360 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9363 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9370 free_extent_buffer(eb);
9371 if (!found_parent) {
9372 error("shared extent %llu referencer lost (parent: %llu)",
9374 return REFERENCER_MISSING;
9380 * This function will check a given extent item, including its backref and
9381 * itself (like crossing stripe boundary and type)
9383 * Since we don't use extent_record anymore, introduce new error bit
9385 static int check_extent_item(struct btrfs_fs_info *fs_info,
9386 struct extent_buffer *eb, int slot)
9388 struct btrfs_extent_item *ei;
9389 struct btrfs_extent_inline_ref *iref;
9390 struct btrfs_extent_data_ref *dref;
9394 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9395 u32 item_size = btrfs_item_size_nr(eb, slot);
9400 struct btrfs_key key;
9404 btrfs_item_key_to_cpu(eb, &key, slot);
9405 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9406 bytes_used += key.offset;
9408 bytes_used += nodesize;
9410 if (item_size < sizeof(*ei)) {
9412 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9413 * old thing when on disk format is still un-determined.
9414 * No need to care about it anymore
9416 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9420 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9421 flags = btrfs_extent_flags(eb, ei);
9423 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9425 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9426 error("bad metadata [%llu, %llu) crossing stripe boundary",
9427 key.objectid, key.objectid + nodesize);
9428 err |= CROSSING_STRIPE_BOUNDARY;
9431 ptr = (unsigned long)(ei + 1);
9433 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9434 /* Old EXTENT_ITEM metadata */
9435 struct btrfs_tree_block_info *info;
9437 info = (struct btrfs_tree_block_info *)ptr;
9438 level = btrfs_tree_block_level(eb, info);
9439 ptr += sizeof(struct btrfs_tree_block_info);
9441 /* New METADATA_ITEM */
9444 end = (unsigned long)ei + item_size;
9447 err |= ITEM_SIZE_MISMATCH;
9451 /* Now check every backref in this extent item */
9453 iref = (struct btrfs_extent_inline_ref *)ptr;
9454 type = btrfs_extent_inline_ref_type(eb, iref);
9455 offset = btrfs_extent_inline_ref_offset(eb, iref);
9457 case BTRFS_TREE_BLOCK_REF_KEY:
9458 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9462 case BTRFS_SHARED_BLOCK_REF_KEY:
9463 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9467 case BTRFS_EXTENT_DATA_REF_KEY:
9468 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9469 ret = check_extent_data_backref(fs_info,
9470 btrfs_extent_data_ref_root(eb, dref),
9471 btrfs_extent_data_ref_objectid(eb, dref),
9472 btrfs_extent_data_ref_offset(eb, dref),
9473 key.objectid, key.offset,
9474 btrfs_extent_data_ref_count(eb, dref));
9477 case BTRFS_SHARED_DATA_REF_KEY:
9478 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9482 error("extent[%llu %d %llu] has unknown ref type: %d",
9483 key.objectid, key.type, key.offset, type);
9484 err |= UNKNOWN_TYPE;
9488 ptr += btrfs_extent_inline_ref_size(type);
9497 * Check if a dev extent item is referred correctly by its chunk
9499 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9500 struct extent_buffer *eb, int slot)
9502 struct btrfs_root *chunk_root = fs_info->chunk_root;
9503 struct btrfs_dev_extent *ptr;
9504 struct btrfs_path path;
9505 struct btrfs_key chunk_key;
9506 struct btrfs_key devext_key;
9507 struct btrfs_chunk *chunk;
9508 struct extent_buffer *l;
9512 int found_chunk = 0;
9515 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9516 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9517 length = btrfs_dev_extent_length(eb, ptr);
9519 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9520 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9521 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9523 btrfs_init_path(&path);
9524 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9529 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9530 if (btrfs_chunk_length(l, chunk) != length)
9533 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9534 for (i = 0; i < num_stripes; i++) {
9535 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9536 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9538 if (devid == devext_key.objectid &&
9539 offset == devext_key.offset) {
9545 btrfs_release_path(&path);
9548 "device extent[%llu, %llu, %llu] did not find the related chunk",
9549 devext_key.objectid, devext_key.offset, length);
9550 return REFERENCER_MISSING;
9556 * Check if the used space is correct with the dev item
9558 static int check_dev_item(struct btrfs_fs_info *fs_info,
9559 struct extent_buffer *eb, int slot)
9561 struct btrfs_root *dev_root = fs_info->dev_root;
9562 struct btrfs_dev_item *dev_item;
9563 struct btrfs_path path;
9564 struct btrfs_key key;
9565 struct btrfs_dev_extent *ptr;
9571 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9572 dev_id = btrfs_device_id(eb, dev_item);
9573 used = btrfs_device_bytes_used(eb, dev_item);
9575 key.objectid = dev_id;
9576 key.type = BTRFS_DEV_EXTENT_KEY;
9579 btrfs_init_path(&path);
9580 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9582 btrfs_item_key_to_cpu(eb, &key, slot);
9583 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9584 key.objectid, key.type, key.offset);
9585 btrfs_release_path(&path);
9586 return REFERENCER_MISSING;
9589 /* Iterate dev_extents to calculate the used space of a device */
9591 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9593 if (key.objectid > dev_id)
9595 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9598 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9599 struct btrfs_dev_extent);
9600 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9602 ret = btrfs_next_item(dev_root, &path);
9606 btrfs_release_path(&path);
9608 if (used != total) {
9609 btrfs_item_key_to_cpu(eb, &key, slot);
9611 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9612 total, used, BTRFS_ROOT_TREE_OBJECTID,
9613 BTRFS_DEV_EXTENT_KEY, dev_id);
9614 return ACCOUNTING_MISMATCH;
9620 * Check a block group item with its referener (chunk) and its used space
9621 * with extent/metadata item
9623 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9624 struct extent_buffer *eb, int slot)
9626 struct btrfs_root *extent_root = fs_info->extent_root;
9627 struct btrfs_root *chunk_root = fs_info->chunk_root;
9628 struct btrfs_block_group_item *bi;
9629 struct btrfs_block_group_item bg_item;
9630 struct btrfs_path path;
9631 struct btrfs_key bg_key;
9632 struct btrfs_key chunk_key;
9633 struct btrfs_key extent_key;
9634 struct btrfs_chunk *chunk;
9635 struct extent_buffer *leaf;
9636 struct btrfs_extent_item *ei;
9637 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9645 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9646 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9647 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9648 used = btrfs_block_group_used(&bg_item);
9649 bg_flags = btrfs_block_group_flags(&bg_item);
9651 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9652 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9653 chunk_key.offset = bg_key.objectid;
9655 btrfs_init_path(&path);
9656 /* Search for the referencer chunk */
9657 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9660 "block group[%llu %llu] did not find the related chunk item",
9661 bg_key.objectid, bg_key.offset);
9662 err |= REFERENCER_MISSING;
9664 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9665 struct btrfs_chunk);
9666 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9669 "block group[%llu %llu] related chunk item length does not match",
9670 bg_key.objectid, bg_key.offset);
9671 err |= REFERENCER_MISMATCH;
9674 btrfs_release_path(&path);
9676 /* Search from the block group bytenr */
9677 extent_key.objectid = bg_key.objectid;
9678 extent_key.type = 0;
9679 extent_key.offset = 0;
9681 btrfs_init_path(&path);
9682 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9686 /* Iterate extent tree to account used space */
9688 leaf = path.nodes[0];
9689 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9690 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9693 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9694 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9696 if (extent_key.objectid < bg_key.objectid)
9699 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9702 total += extent_key.offset;
9704 ei = btrfs_item_ptr(leaf, path.slots[0],
9705 struct btrfs_extent_item);
9706 flags = btrfs_extent_flags(leaf, ei);
9707 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9708 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9710 "bad extent[%llu, %llu) type mismatch with chunk",
9711 extent_key.objectid,
9712 extent_key.objectid + extent_key.offset);
9713 err |= CHUNK_TYPE_MISMATCH;
9715 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9716 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9717 BTRFS_BLOCK_GROUP_METADATA))) {
9719 "bad extent[%llu, %llu) type mismatch with chunk",
9720 extent_key.objectid,
9721 extent_key.objectid + nodesize);
9722 err |= CHUNK_TYPE_MISMATCH;
9726 ret = btrfs_next_item(extent_root, &path);
9732 btrfs_release_path(&path);
9734 if (total != used) {
9736 "block group[%llu %llu] used %llu but extent items used %llu",
9737 bg_key.objectid, bg_key.offset, used, total);
9738 err |= ACCOUNTING_MISMATCH;
9744 * Check a chunk item.
9745 * Including checking all referred dev_extents and block group
9747 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9748 struct extent_buffer *eb, int slot)
9750 struct btrfs_root *extent_root = fs_info->extent_root;
9751 struct btrfs_root *dev_root = fs_info->dev_root;
9752 struct btrfs_path path;
9753 struct btrfs_key chunk_key;
9754 struct btrfs_key bg_key;
9755 struct btrfs_key devext_key;
9756 struct btrfs_chunk *chunk;
9757 struct extent_buffer *leaf;
9758 struct btrfs_block_group_item *bi;
9759 struct btrfs_block_group_item bg_item;
9760 struct btrfs_dev_extent *ptr;
9761 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9773 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9774 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9775 length = btrfs_chunk_length(eb, chunk);
9776 chunk_end = chunk_key.offset + length;
9777 if (!IS_ALIGNED(length, sectorsize)) {
9778 error("chunk[%llu %llu) not aligned to %u",
9779 chunk_key.offset, chunk_end, sectorsize);
9780 err |= BYTES_UNALIGNED;
9784 type = btrfs_chunk_type(eb, chunk);
9785 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9786 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9787 error("chunk[%llu %llu) has no chunk type",
9788 chunk_key.offset, chunk_end);
9789 err |= UNKNOWN_TYPE;
9791 if (profile && (profile & (profile - 1))) {
9792 error("chunk[%llu %llu) multiple profiles detected: %llx",
9793 chunk_key.offset, chunk_end, profile);
9794 err |= UNKNOWN_TYPE;
9797 bg_key.objectid = chunk_key.offset;
9798 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9799 bg_key.offset = length;
9801 btrfs_init_path(&path);
9802 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9805 "chunk[%llu %llu) did not find the related block group item",
9806 chunk_key.offset, chunk_end);
9807 err |= REFERENCER_MISSING;
9809 leaf = path.nodes[0];
9810 bi = btrfs_item_ptr(leaf, path.slots[0],
9811 struct btrfs_block_group_item);
9812 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9814 if (btrfs_block_group_flags(&bg_item) != type) {
9816 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9817 chunk_key.offset, chunk_end, type,
9818 btrfs_block_group_flags(&bg_item));
9819 err |= REFERENCER_MISSING;
9823 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9824 for (i = 0; i < num_stripes; i++) {
9825 btrfs_release_path(&path);
9826 btrfs_init_path(&path);
9827 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9828 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9829 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9831 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9836 leaf = path.nodes[0];
9837 ptr = btrfs_item_ptr(leaf, path.slots[0],
9838 struct btrfs_dev_extent);
9839 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9840 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9841 if (objectid != chunk_key.objectid ||
9842 offset != chunk_key.offset ||
9843 btrfs_dev_extent_length(leaf, ptr) != length)
9847 err |= BACKREF_MISSING;
9849 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9850 chunk_key.objectid, chunk_end, i);
9853 btrfs_release_path(&path);
9859 * Main entry function to check known items and update related accounting info
9861 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9863 struct btrfs_fs_info *fs_info = root->fs_info;
9864 struct btrfs_key key;
9867 struct btrfs_extent_data_ref *dref;
9872 btrfs_item_key_to_cpu(eb, &key, slot);
9873 type = btrfs_key_type(&key);
9876 case BTRFS_EXTENT_DATA_KEY:
9877 ret = check_extent_data_item(root, eb, slot);
9880 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9881 ret = check_block_group_item(fs_info, eb, slot);
9884 case BTRFS_DEV_ITEM_KEY:
9885 ret = check_dev_item(fs_info, eb, slot);
9888 case BTRFS_CHUNK_ITEM_KEY:
9889 ret = check_chunk_item(fs_info, eb, slot);
9892 case BTRFS_DEV_EXTENT_KEY:
9893 ret = check_dev_extent_item(fs_info, eb, slot);
9896 case BTRFS_EXTENT_ITEM_KEY:
9897 case BTRFS_METADATA_ITEM_KEY:
9898 ret = check_extent_item(fs_info, eb, slot);
9901 case BTRFS_EXTENT_CSUM_KEY:
9902 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9904 case BTRFS_TREE_BLOCK_REF_KEY:
9905 ret = check_tree_block_backref(fs_info, key.offset,
9909 case BTRFS_EXTENT_DATA_REF_KEY:
9910 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9911 ret = check_extent_data_backref(fs_info,
9912 btrfs_extent_data_ref_root(eb, dref),
9913 btrfs_extent_data_ref_objectid(eb, dref),
9914 btrfs_extent_data_ref_offset(eb, dref),
9916 btrfs_extent_data_ref_count(eb, dref));
9919 case BTRFS_SHARED_BLOCK_REF_KEY:
9920 ret = check_shared_block_backref(fs_info, key.offset,
9924 case BTRFS_SHARED_DATA_REF_KEY:
9925 ret = check_shared_data_backref(fs_info, key.offset,
9933 if (++slot < btrfs_header_nritems(eb))
9940 * Helper function for later fs/subvol tree check. To determine if a tree
9941 * block should be checked.
9942 * This function will ensure only the direct referencer with lowest rootid to
9943 * check a fs/subvolume tree block.
9945 * Backref check at extent tree would detect errors like missing subvolume
9946 * tree, so we can do aggressive check to reduce duplicated checks.
9948 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9950 struct btrfs_root *extent_root = root->fs_info->extent_root;
9951 struct btrfs_key key;
9952 struct btrfs_path path;
9953 struct extent_buffer *leaf;
9955 struct btrfs_extent_item *ei;
9961 struct btrfs_extent_inline_ref *iref;
9964 btrfs_init_path(&path);
9965 key.objectid = btrfs_header_bytenr(eb);
9966 key.type = BTRFS_METADATA_ITEM_KEY;
9967 key.offset = (u64)-1;
9970 * Any failure in backref resolving means we can't determine
9971 * whom the tree block belongs to.
9972 * So in that case, we need to check that tree block
9974 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9978 ret = btrfs_previous_extent_item(extent_root, &path,
9979 btrfs_header_bytenr(eb));
9983 leaf = path.nodes[0];
9984 slot = path.slots[0];
9985 btrfs_item_key_to_cpu(leaf, &key, slot);
9986 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9988 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9989 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9991 struct btrfs_tree_block_info *info;
9993 info = (struct btrfs_tree_block_info *)(ei + 1);
9994 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9997 item_size = btrfs_item_size_nr(leaf, slot);
9998 ptr = (unsigned long)iref;
9999 end = (unsigned long)ei + item_size;
10000 while (ptr < end) {
10001 iref = (struct btrfs_extent_inline_ref *)ptr;
10002 type = btrfs_extent_inline_ref_type(leaf, iref);
10003 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10006 * We only check the tree block if current root is
10007 * the lowest referencer of it.
10009 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10010 offset < root->objectid) {
10011 btrfs_release_path(&path);
10015 ptr += btrfs_extent_inline_ref_size(type);
10018 * Normally we should also check keyed tree block ref, but that may be
10019 * very time consuming. Inlined ref should already make us skip a lot
10020 * of refs now. So skip search keyed tree block ref.
10024 btrfs_release_path(&path);
10029 * Traversal function for tree block. We will do:
10030 * 1) Skip shared fs/subvolume tree blocks
10031 * 2) Update related bytes accounting
10032 * 3) Pre-order traversal
10034 static int traverse_tree_block(struct btrfs_root *root,
10035 struct extent_buffer *node)
10037 struct extent_buffer *eb;
10045 * Skip shared fs/subvolume tree block, in that case they will
10046 * be checked by referencer with lowest rootid
10048 if (is_fstree(root->objectid) && !should_check(root, node))
10051 /* Update bytes accounting */
10052 total_btree_bytes += node->len;
10053 if (fs_root_objectid(btrfs_header_owner(node)))
10054 total_fs_tree_bytes += node->len;
10055 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10056 total_extent_tree_bytes += node->len;
10057 if (!found_old_backref &&
10058 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10059 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10060 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10061 found_old_backref = 1;
10063 /* pre-order tranversal, check itself first */
10064 level = btrfs_header_level(node);
10065 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10066 btrfs_header_level(node),
10067 btrfs_header_owner(node));
10071 "check %s failed root %llu bytenr %llu level %d, force continue check",
10072 level ? "node":"leaf", root->objectid,
10073 btrfs_header_bytenr(node), btrfs_header_level(node));
10076 btree_space_waste += btrfs_leaf_free_space(root, node);
10077 ret = check_leaf_items(root, node);
10082 nr = btrfs_header_nritems(node);
10083 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10084 sizeof(struct btrfs_key_ptr);
10086 /* Then check all its children */
10087 for (i = 0; i < nr; i++) {
10088 u64 blocknr = btrfs_node_blockptr(node, i);
10091 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10092 * to call the function itself.
10094 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10095 if (extent_buffer_uptodate(eb)) {
10096 ret = traverse_tree_block(root, eb);
10099 free_extent_buffer(eb);
10106 * Low memory usage version check_chunks_and_extents.
10108 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10110 struct btrfs_path path;
10111 struct btrfs_key key;
10112 struct btrfs_root *root1;
10113 struct btrfs_root *cur_root;
10117 root1 = root->fs_info->chunk_root;
10118 ret = traverse_tree_block(root1, root1->node);
10121 root1 = root->fs_info->tree_root;
10122 ret = traverse_tree_block(root1, root1->node);
10125 btrfs_init_path(&path);
10126 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10128 key.type = BTRFS_ROOT_ITEM_KEY;
10130 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10132 error("cannot find extent treet in tree_root");
10137 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10138 if (key.type != BTRFS_ROOT_ITEM_KEY)
10140 key.offset = (u64)-1;
10142 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10143 if (IS_ERR(cur_root) || !cur_root) {
10144 error("failed to read tree: %lld", key.objectid);
10148 ret = traverse_tree_block(cur_root, cur_root->node);
10152 ret = btrfs_next_item(root1, &path);
10158 btrfs_release_path(&path);
10162 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10163 struct btrfs_root *root, int overwrite)
10165 struct extent_buffer *c;
10166 struct extent_buffer *old = root->node;
10169 struct btrfs_disk_key disk_key = {0,0,0};
10175 extent_buffer_get(c);
10178 c = btrfs_alloc_free_block(trans, root,
10180 root->root_key.objectid,
10181 &disk_key, level, 0, 0);
10184 extent_buffer_get(c);
10188 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10189 btrfs_set_header_level(c, level);
10190 btrfs_set_header_bytenr(c, c->start);
10191 btrfs_set_header_generation(c, trans->transid);
10192 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10193 btrfs_set_header_owner(c, root->root_key.objectid);
10195 write_extent_buffer(c, root->fs_info->fsid,
10196 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10198 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10199 btrfs_header_chunk_tree_uuid(c),
10202 btrfs_mark_buffer_dirty(c);
10204 * this case can happen in the following case:
10206 * 1.overwrite previous root.
10208 * 2.reinit reloc data root, this is because we skip pin
10209 * down reloc data tree before which means we can allocate
10210 * same block bytenr here.
10212 if (old->start == c->start) {
10213 btrfs_set_root_generation(&root->root_item,
10215 root->root_item.level = btrfs_header_level(root->node);
10216 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10217 &root->root_key, &root->root_item);
10219 free_extent_buffer(c);
10223 free_extent_buffer(old);
10225 add_root_to_dirty_list(root);
10229 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10230 struct extent_buffer *eb, int tree_root)
10232 struct extent_buffer *tmp;
10233 struct btrfs_root_item *ri;
10234 struct btrfs_key key;
10237 int level = btrfs_header_level(eb);
10243 * If we have pinned this block before, don't pin it again.
10244 * This can not only avoid forever loop with broken filesystem
10245 * but also give us some speedups.
10247 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10248 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10251 btrfs_pin_extent(fs_info, eb->start, eb->len);
10253 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10254 nritems = btrfs_header_nritems(eb);
10255 for (i = 0; i < nritems; i++) {
10257 btrfs_item_key_to_cpu(eb, &key, i);
10258 if (key.type != BTRFS_ROOT_ITEM_KEY)
10260 /* Skip the extent root and reloc roots */
10261 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10262 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10263 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10265 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10266 bytenr = btrfs_disk_root_bytenr(eb, ri);
10269 * If at any point we start needing the real root we
10270 * will have to build a stump root for the root we are
10271 * in, but for now this doesn't actually use the root so
10272 * just pass in extent_root.
10274 tmp = read_tree_block(fs_info->extent_root, bytenr,
10276 if (!extent_buffer_uptodate(tmp)) {
10277 fprintf(stderr, "Error reading root block\n");
10280 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10281 free_extent_buffer(tmp);
10285 bytenr = btrfs_node_blockptr(eb, i);
10287 /* If we aren't the tree root don't read the block */
10288 if (level == 1 && !tree_root) {
10289 btrfs_pin_extent(fs_info, bytenr, nodesize);
10293 tmp = read_tree_block(fs_info->extent_root, bytenr,
10295 if (!extent_buffer_uptodate(tmp)) {
10296 fprintf(stderr, "Error reading tree block\n");
10299 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10300 free_extent_buffer(tmp);
10309 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10313 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10317 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10320 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10322 struct btrfs_block_group_cache *cache;
10323 struct btrfs_path *path;
10324 struct extent_buffer *leaf;
10325 struct btrfs_chunk *chunk;
10326 struct btrfs_key key;
10330 path = btrfs_alloc_path();
10335 key.type = BTRFS_CHUNK_ITEM_KEY;
10338 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10340 btrfs_free_path(path);
10345 * We do this in case the block groups were screwed up and had alloc
10346 * bits that aren't actually set on the chunks. This happens with
10347 * restored images every time and could happen in real life I guess.
10349 fs_info->avail_data_alloc_bits = 0;
10350 fs_info->avail_metadata_alloc_bits = 0;
10351 fs_info->avail_system_alloc_bits = 0;
10353 /* First we need to create the in-memory block groups */
10355 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10356 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10358 btrfs_free_path(path);
10366 leaf = path->nodes[0];
10367 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10368 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10373 chunk = btrfs_item_ptr(leaf, path->slots[0],
10374 struct btrfs_chunk);
10375 btrfs_add_block_group(fs_info, 0,
10376 btrfs_chunk_type(leaf, chunk),
10377 key.objectid, key.offset,
10378 btrfs_chunk_length(leaf, chunk));
10379 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10380 key.offset + btrfs_chunk_length(leaf, chunk),
10386 cache = btrfs_lookup_first_block_group(fs_info, start);
10390 start = cache->key.objectid + cache->key.offset;
10393 btrfs_free_path(path);
10397 static int reset_balance(struct btrfs_trans_handle *trans,
10398 struct btrfs_fs_info *fs_info)
10400 struct btrfs_root *root = fs_info->tree_root;
10401 struct btrfs_path *path;
10402 struct extent_buffer *leaf;
10403 struct btrfs_key key;
10404 int del_slot, del_nr = 0;
10408 path = btrfs_alloc_path();
10412 key.objectid = BTRFS_BALANCE_OBJECTID;
10413 key.type = BTRFS_BALANCE_ITEM_KEY;
10416 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10421 goto reinit_data_reloc;
10426 ret = btrfs_del_item(trans, root, path);
10429 btrfs_release_path(path);
10431 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10432 key.type = BTRFS_ROOT_ITEM_KEY;
10435 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10439 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10444 ret = btrfs_del_items(trans, root, path,
10451 btrfs_release_path(path);
10454 ret = btrfs_search_slot(trans, root, &key, path,
10461 leaf = path->nodes[0];
10462 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10463 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10465 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10470 del_slot = path->slots[0];
10479 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10483 btrfs_release_path(path);
10486 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10487 key.type = BTRFS_ROOT_ITEM_KEY;
10488 key.offset = (u64)-1;
10489 root = btrfs_read_fs_root(fs_info, &key);
10490 if (IS_ERR(root)) {
10491 fprintf(stderr, "Error reading data reloc tree\n");
10492 ret = PTR_ERR(root);
10495 record_root_in_trans(trans, root);
10496 ret = btrfs_fsck_reinit_root(trans, root, 0);
10499 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10501 btrfs_free_path(path);
10505 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10506 struct btrfs_fs_info *fs_info)
10512 * The only reason we don't do this is because right now we're just
10513 * walking the trees we find and pinning down their bytes, we don't look
10514 * at any of the leaves. In order to do mixed groups we'd have to check
10515 * the leaves of any fs roots and pin down the bytes for any file
10516 * extents we find. Not hard but why do it if we don't have to?
10518 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10519 fprintf(stderr, "We don't support re-initing the extent tree "
10520 "for mixed block groups yet, please notify a btrfs "
10521 "developer you want to do this so they can add this "
10522 "functionality.\n");
10527 * first we need to walk all of the trees except the extent tree and pin
10528 * down the bytes that are in use so we don't overwrite any existing
10531 ret = pin_metadata_blocks(fs_info);
10533 fprintf(stderr, "error pinning down used bytes\n");
10538 * Need to drop all the block groups since we're going to recreate all
10541 btrfs_free_block_groups(fs_info);
10542 ret = reset_block_groups(fs_info);
10544 fprintf(stderr, "error resetting the block groups\n");
10548 /* Ok we can allocate now, reinit the extent root */
10549 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10551 fprintf(stderr, "extent root initialization failed\n");
10553 * When the transaction code is updated we should end the
10554 * transaction, but for now progs only knows about commit so
10555 * just return an error.
10561 * Now we have all the in-memory block groups setup so we can make
10562 * allocations properly, and the metadata we care about is safe since we
10563 * pinned all of it above.
10566 struct btrfs_block_group_cache *cache;
10568 cache = btrfs_lookup_first_block_group(fs_info, start);
10571 start = cache->key.objectid + cache->key.offset;
10572 ret = btrfs_insert_item(trans, fs_info->extent_root,
10573 &cache->key, &cache->item,
10574 sizeof(cache->item));
10576 fprintf(stderr, "Error adding block group\n");
10579 btrfs_extent_post_op(trans, fs_info->extent_root);
10582 ret = reset_balance(trans, fs_info);
10584 fprintf(stderr, "error resetting the pending balance\n");
10589 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10591 struct btrfs_path *path;
10592 struct btrfs_trans_handle *trans;
10593 struct btrfs_key key;
10596 printf("Recowing metadata block %llu\n", eb->start);
10597 key.objectid = btrfs_header_owner(eb);
10598 key.type = BTRFS_ROOT_ITEM_KEY;
10599 key.offset = (u64)-1;
10601 root = btrfs_read_fs_root(root->fs_info, &key);
10602 if (IS_ERR(root)) {
10603 fprintf(stderr, "Couldn't find owner root %llu\n",
10605 return PTR_ERR(root);
10608 path = btrfs_alloc_path();
10612 trans = btrfs_start_transaction(root, 1);
10613 if (IS_ERR(trans)) {
10614 btrfs_free_path(path);
10615 return PTR_ERR(trans);
10618 path->lowest_level = btrfs_header_level(eb);
10619 if (path->lowest_level)
10620 btrfs_node_key_to_cpu(eb, &key, 0);
10622 btrfs_item_key_to_cpu(eb, &key, 0);
10624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10625 btrfs_commit_transaction(trans, root);
10626 btrfs_free_path(path);
10630 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10632 struct btrfs_path *path;
10633 struct btrfs_trans_handle *trans;
10634 struct btrfs_key key;
10637 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10638 bad->key.type, bad->key.offset);
10639 key.objectid = bad->root_id;
10640 key.type = BTRFS_ROOT_ITEM_KEY;
10641 key.offset = (u64)-1;
10643 root = btrfs_read_fs_root(root->fs_info, &key);
10644 if (IS_ERR(root)) {
10645 fprintf(stderr, "Couldn't find owner root %llu\n",
10647 return PTR_ERR(root);
10650 path = btrfs_alloc_path();
10654 trans = btrfs_start_transaction(root, 1);
10655 if (IS_ERR(trans)) {
10656 btrfs_free_path(path);
10657 return PTR_ERR(trans);
10660 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10666 ret = btrfs_del_item(trans, root, path);
10668 btrfs_commit_transaction(trans, root);
10669 btrfs_free_path(path);
10673 static int zero_log_tree(struct btrfs_root *root)
10675 struct btrfs_trans_handle *trans;
10678 trans = btrfs_start_transaction(root, 1);
10679 if (IS_ERR(trans)) {
10680 ret = PTR_ERR(trans);
10683 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10684 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10685 ret = btrfs_commit_transaction(trans, root);
10689 static int populate_csum(struct btrfs_trans_handle *trans,
10690 struct btrfs_root *csum_root, char *buf, u64 start,
10697 while (offset < len) {
10698 sectorsize = csum_root->sectorsize;
10699 ret = read_extent_data(csum_root, buf, start + offset,
10703 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10704 start + offset, buf, sectorsize);
10707 offset += sectorsize;
10712 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10713 struct btrfs_root *csum_root,
10714 struct btrfs_root *cur_root)
10716 struct btrfs_path *path;
10717 struct btrfs_key key;
10718 struct extent_buffer *node;
10719 struct btrfs_file_extent_item *fi;
10726 path = btrfs_alloc_path();
10729 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10739 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10742 /* Iterate all regular file extents and fill its csum */
10744 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10746 if (key.type != BTRFS_EXTENT_DATA_KEY)
10748 node = path->nodes[0];
10749 slot = path->slots[0];
10750 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10751 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10753 start = btrfs_file_extent_disk_bytenr(node, fi);
10754 len = btrfs_file_extent_disk_num_bytes(node, fi);
10756 ret = populate_csum(trans, csum_root, buf, start, len);
10757 if (ret == -EEXIST)
10763 * TODO: if next leaf is corrupted, jump to nearest next valid
10766 ret = btrfs_next_item(cur_root, path);
10776 btrfs_free_path(path);
10781 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10782 struct btrfs_root *csum_root)
10784 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10785 struct btrfs_path *path;
10786 struct btrfs_root *tree_root = fs_info->tree_root;
10787 struct btrfs_root *cur_root;
10788 struct extent_buffer *node;
10789 struct btrfs_key key;
10793 path = btrfs_alloc_path();
10797 key.objectid = BTRFS_FS_TREE_OBJECTID;
10799 key.type = BTRFS_ROOT_ITEM_KEY;
10801 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10810 node = path->nodes[0];
10811 slot = path->slots[0];
10812 btrfs_item_key_to_cpu(node, &key, slot);
10813 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10815 if (key.type != BTRFS_ROOT_ITEM_KEY)
10817 if (!is_fstree(key.objectid))
10819 key.offset = (u64)-1;
10821 cur_root = btrfs_read_fs_root(fs_info, &key);
10822 if (IS_ERR(cur_root) || !cur_root) {
10823 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10827 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10832 ret = btrfs_next_item(tree_root, path);
10842 btrfs_free_path(path);
10846 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10847 struct btrfs_root *csum_root)
10849 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10850 struct btrfs_path *path;
10851 struct btrfs_extent_item *ei;
10852 struct extent_buffer *leaf;
10854 struct btrfs_key key;
10857 path = btrfs_alloc_path();
10862 key.type = BTRFS_EXTENT_ITEM_KEY;
10865 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10867 btrfs_free_path(path);
10871 buf = malloc(csum_root->sectorsize);
10873 btrfs_free_path(path);
10878 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10879 ret = btrfs_next_leaf(extent_root, path);
10887 leaf = path->nodes[0];
10889 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10890 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10895 ei = btrfs_item_ptr(leaf, path->slots[0],
10896 struct btrfs_extent_item);
10897 if (!(btrfs_extent_flags(leaf, ei) &
10898 BTRFS_EXTENT_FLAG_DATA)) {
10903 ret = populate_csum(trans, csum_root, buf, key.objectid,
10910 btrfs_free_path(path);
10916 * Recalculate the csum and put it into the csum tree.
10918 * Extent tree init will wipe out all the extent info, so in that case, we
10919 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10920 * will use fs/subvol trees to init the csum tree.
10922 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10923 struct btrfs_root *csum_root,
10924 int search_fs_tree)
10926 if (search_fs_tree)
10927 return fill_csum_tree_from_fs(trans, csum_root);
10929 return fill_csum_tree_from_extent(trans, csum_root);
10932 static void free_roots_info_cache(void)
10934 if (!roots_info_cache)
10937 while (!cache_tree_empty(roots_info_cache)) {
10938 struct cache_extent *entry;
10939 struct root_item_info *rii;
10941 entry = first_cache_extent(roots_info_cache);
10944 remove_cache_extent(roots_info_cache, entry);
10945 rii = container_of(entry, struct root_item_info, cache_extent);
10949 free(roots_info_cache);
10950 roots_info_cache = NULL;
10953 static int build_roots_info_cache(struct btrfs_fs_info *info)
10956 struct btrfs_key key;
10957 struct extent_buffer *leaf;
10958 struct btrfs_path *path;
10960 if (!roots_info_cache) {
10961 roots_info_cache = malloc(sizeof(*roots_info_cache));
10962 if (!roots_info_cache)
10964 cache_tree_init(roots_info_cache);
10967 path = btrfs_alloc_path();
10972 key.type = BTRFS_EXTENT_ITEM_KEY;
10975 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10978 leaf = path->nodes[0];
10981 struct btrfs_key found_key;
10982 struct btrfs_extent_item *ei;
10983 struct btrfs_extent_inline_ref *iref;
10984 int slot = path->slots[0];
10989 struct cache_extent *entry;
10990 struct root_item_info *rii;
10992 if (slot >= btrfs_header_nritems(leaf)) {
10993 ret = btrfs_next_leaf(info->extent_root, path);
11000 leaf = path->nodes[0];
11001 slot = path->slots[0];
11004 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11006 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11007 found_key.type != BTRFS_METADATA_ITEM_KEY)
11010 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11011 flags = btrfs_extent_flags(leaf, ei);
11013 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11014 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11017 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11018 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11019 level = found_key.offset;
11021 struct btrfs_tree_block_info *binfo;
11023 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11024 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11025 level = btrfs_tree_block_level(leaf, binfo);
11029 * For a root extent, it must be of the following type and the
11030 * first (and only one) iref in the item.
11032 type = btrfs_extent_inline_ref_type(leaf, iref);
11033 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11036 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11037 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11039 rii = malloc(sizeof(struct root_item_info));
11044 rii->cache_extent.start = root_id;
11045 rii->cache_extent.size = 1;
11046 rii->level = (u8)-1;
11047 entry = &rii->cache_extent;
11048 ret = insert_cache_extent(roots_info_cache, entry);
11051 rii = container_of(entry, struct root_item_info,
11055 ASSERT(rii->cache_extent.start == root_id);
11056 ASSERT(rii->cache_extent.size == 1);
11058 if (level > rii->level || rii->level == (u8)-1) {
11059 rii->level = level;
11060 rii->bytenr = found_key.objectid;
11061 rii->gen = btrfs_extent_generation(leaf, ei);
11062 rii->node_count = 1;
11063 } else if (level == rii->level) {
11071 btrfs_free_path(path);
11076 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11077 struct btrfs_path *path,
11078 const struct btrfs_key *root_key,
11079 const int read_only_mode)
11081 const u64 root_id = root_key->objectid;
11082 struct cache_extent *entry;
11083 struct root_item_info *rii;
11084 struct btrfs_root_item ri;
11085 unsigned long offset;
11087 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11090 "Error: could not find extent items for root %llu\n",
11091 root_key->objectid);
11095 rii = container_of(entry, struct root_item_info, cache_extent);
11096 ASSERT(rii->cache_extent.start == root_id);
11097 ASSERT(rii->cache_extent.size == 1);
11099 if (rii->node_count != 1) {
11101 "Error: could not find btree root extent for root %llu\n",
11106 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11107 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11109 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11110 btrfs_root_level(&ri) != rii->level ||
11111 btrfs_root_generation(&ri) != rii->gen) {
11114 * If we're in repair mode but our caller told us to not update
11115 * the root item, i.e. just check if it needs to be updated, don't
11116 * print this message, since the caller will call us again shortly
11117 * for the same root item without read only mode (the caller will
11118 * open a transaction first).
11120 if (!(read_only_mode && repair))
11122 "%sroot item for root %llu,"
11123 " current bytenr %llu, current gen %llu, current level %u,"
11124 " new bytenr %llu, new gen %llu, new level %u\n",
11125 (read_only_mode ? "" : "fixing "),
11127 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11128 btrfs_root_level(&ri),
11129 rii->bytenr, rii->gen, rii->level);
11131 if (btrfs_root_generation(&ri) > rii->gen) {
11133 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11134 root_id, btrfs_root_generation(&ri), rii->gen);
11138 if (!read_only_mode) {
11139 btrfs_set_root_bytenr(&ri, rii->bytenr);
11140 btrfs_set_root_level(&ri, rii->level);
11141 btrfs_set_root_generation(&ri, rii->gen);
11142 write_extent_buffer(path->nodes[0], &ri,
11143 offset, sizeof(ri));
11153 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11154 * caused read-only snapshots to be corrupted if they were created at a moment
11155 * when the source subvolume/snapshot had orphan items. The issue was that the
11156 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11157 * node instead of the post orphan cleanup root node.
11158 * So this function, and its callees, just detects and fixes those cases. Even
11159 * though the regression was for read-only snapshots, this function applies to
11160 * any snapshot/subvolume root.
11161 * This must be run before any other repair code - not doing it so, makes other
11162 * repair code delete or modify backrefs in the extent tree for example, which
11163 * will result in an inconsistent fs after repairing the root items.
11165 static int repair_root_items(struct btrfs_fs_info *info)
11167 struct btrfs_path *path = NULL;
11168 struct btrfs_key key;
11169 struct extent_buffer *leaf;
11170 struct btrfs_trans_handle *trans = NULL;
11173 int need_trans = 0;
11175 ret = build_roots_info_cache(info);
11179 path = btrfs_alloc_path();
11185 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11186 key.type = BTRFS_ROOT_ITEM_KEY;
11191 * Avoid opening and committing transactions if a leaf doesn't have
11192 * any root items that need to be fixed, so that we avoid rotating
11193 * backup roots unnecessarily.
11196 trans = btrfs_start_transaction(info->tree_root, 1);
11197 if (IS_ERR(trans)) {
11198 ret = PTR_ERR(trans);
11203 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11207 leaf = path->nodes[0];
11210 struct btrfs_key found_key;
11212 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11213 int no_more_keys = find_next_key(path, &key);
11215 btrfs_release_path(path);
11217 ret = btrfs_commit_transaction(trans,
11229 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11231 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11233 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11236 ret = maybe_repair_root_item(info, path, &found_key,
11241 if (!trans && repair) {
11244 btrfs_release_path(path);
11254 free_roots_info_cache();
11255 btrfs_free_path(path);
11257 btrfs_commit_transaction(trans, info->tree_root);
11264 const char * const cmd_check_usage[] = {
11265 "btrfs check [options] <device>",
11266 "Check structural integrity of a filesystem (unmounted).",
11267 "Check structural integrity of an unmounted filesystem. Verify internal",
11268 "trees' consistency and item connectivity. In the repair mode try to",
11269 "fix the problems found. ",
11270 "WARNING: the repair mode is considered dangerous",
11272 "-s|--super <superblock> use this superblock copy",
11273 "-b|--backup use the first valid backup root copy",
11274 "--repair try to repair the filesystem",
11275 "--readonly run in read-only mode (default)",
11276 "--init-csum-tree create a new CRC tree",
11277 "--init-extent-tree create a new extent tree",
11278 "--mode <MODE> select mode, allows to make some memory/IO",
11279 " trade-offs, where MODE is one of:",
11280 " original - read inodes and extents to memory (requires",
11281 " more memory, does less IO)",
11282 " lowmem - try to use less memory but read blocks again",
11284 "--check-data-csum verify checksums of data blocks",
11285 "-Q|--qgroup-report print a report on qgroup consistency",
11286 "-E|--subvol-extents <subvolid>",
11287 " print subvolume extents and sharing state",
11288 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11289 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11290 "-p|--progress indicate progress",
11294 int cmd_check(int argc, char **argv)
11296 struct cache_tree root_cache;
11297 struct btrfs_root *root;
11298 struct btrfs_fs_info *info;
11301 u64 tree_root_bytenr = 0;
11302 u64 chunk_root_bytenr = 0;
11303 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11306 int init_csum_tree = 0;
11308 int qgroup_report = 0;
11309 int qgroups_repaired = 0;
11310 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11314 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11315 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11316 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11318 static const struct option long_options[] = {
11319 { "super", required_argument, NULL, 's' },
11320 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11321 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11322 { "init-csum-tree", no_argument, NULL,
11323 GETOPT_VAL_INIT_CSUM },
11324 { "init-extent-tree", no_argument, NULL,
11325 GETOPT_VAL_INIT_EXTENT },
11326 { "check-data-csum", no_argument, NULL,
11327 GETOPT_VAL_CHECK_CSUM },
11328 { "backup", no_argument, NULL, 'b' },
11329 { "subvol-extents", required_argument, NULL, 'E' },
11330 { "qgroup-report", no_argument, NULL, 'Q' },
11331 { "tree-root", required_argument, NULL, 'r' },
11332 { "chunk-root", required_argument, NULL,
11333 GETOPT_VAL_CHUNK_TREE },
11334 { "progress", no_argument, NULL, 'p' },
11335 { "mode", required_argument, NULL,
11337 { NULL, 0, NULL, 0}
11340 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11344 case 'a': /* ignored */ break;
11346 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11349 num = arg_strtou64(optarg);
11350 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11352 "ERROR: super mirror should be less than: %d\n",
11353 BTRFS_SUPER_MIRROR_MAX);
11356 bytenr = btrfs_sb_offset(((int)num));
11357 printf("using SB copy %llu, bytenr %llu\n", num,
11358 (unsigned long long)bytenr);
11364 subvolid = arg_strtou64(optarg);
11367 tree_root_bytenr = arg_strtou64(optarg);
11369 case GETOPT_VAL_CHUNK_TREE:
11370 chunk_root_bytenr = arg_strtou64(optarg);
11373 ctx.progress_enabled = true;
11377 usage(cmd_check_usage);
11378 case GETOPT_VAL_REPAIR:
11379 printf("enabling repair mode\n");
11381 ctree_flags |= OPEN_CTREE_WRITES;
11383 case GETOPT_VAL_READONLY:
11386 case GETOPT_VAL_INIT_CSUM:
11387 printf("Creating a new CRC tree\n");
11388 init_csum_tree = 1;
11390 ctree_flags |= OPEN_CTREE_WRITES;
11392 case GETOPT_VAL_INIT_EXTENT:
11393 init_extent_tree = 1;
11394 ctree_flags |= (OPEN_CTREE_WRITES |
11395 OPEN_CTREE_NO_BLOCK_GROUPS);
11398 case GETOPT_VAL_CHECK_CSUM:
11399 check_data_csum = 1;
11401 case GETOPT_VAL_MODE:
11402 check_mode = parse_check_mode(optarg);
11403 if (check_mode == CHECK_MODE_UNKNOWN) {
11404 error("unknown mode: %s", optarg);
11411 if (check_argc_exact(argc - optind, 1))
11412 usage(cmd_check_usage);
11414 if (ctx.progress_enabled) {
11415 ctx.tp = TASK_NOTHING;
11416 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11419 /* This check is the only reason for --readonly to exist */
11420 if (readonly && repair) {
11421 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11426 * Not supported yet
11428 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11429 error("Low memory mode doesn't support repair yet");
11434 cache_tree_init(&root_cache);
11436 if((ret = check_mounted(argv[optind])) < 0) {
11437 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11440 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11445 /* only allow partial opening under repair mode */
11447 ctree_flags |= OPEN_CTREE_PARTIAL;
11449 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11450 chunk_root_bytenr, ctree_flags);
11452 fprintf(stderr, "Couldn't open file system\n");
11457 global_info = info;
11458 root = info->fs_root;
11461 * repair mode will force us to commit transaction which
11462 * will make us fail to load log tree when mounting.
11464 if (repair && btrfs_super_log_root(info->super_copy)) {
11465 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11470 ret = zero_log_tree(root);
11472 fprintf(stderr, "fail to zero log tree\n");
11477 uuid_unparse(info->super_copy->fsid, uuidbuf);
11478 if (qgroup_report) {
11479 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11481 ret = qgroup_verify_all(info);
11487 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11488 subvolid, argv[optind], uuidbuf);
11489 ret = print_extent_state(info, subvolid);
11492 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11494 if (!extent_buffer_uptodate(info->tree_root->node) ||
11495 !extent_buffer_uptodate(info->dev_root->node) ||
11496 !extent_buffer_uptodate(info->chunk_root->node)) {
11497 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11502 if (init_extent_tree || init_csum_tree) {
11503 struct btrfs_trans_handle *trans;
11505 trans = btrfs_start_transaction(info->extent_root, 0);
11506 if (IS_ERR(trans)) {
11507 fprintf(stderr, "Error starting transaction\n");
11508 ret = PTR_ERR(trans);
11512 if (init_extent_tree) {
11513 printf("Creating a new extent tree\n");
11514 ret = reinit_extent_tree(trans, info);
11519 if (init_csum_tree) {
11520 fprintf(stderr, "Reinit crc root\n");
11521 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11523 fprintf(stderr, "crc root initialization failed\n");
11528 ret = fill_csum_tree(trans, info->csum_root,
11531 fprintf(stderr, "crc refilling failed\n");
11536 * Ok now we commit and run the normal fsck, which will add
11537 * extent entries for all of the items it finds.
11539 ret = btrfs_commit_transaction(trans, info->extent_root);
11543 if (!extent_buffer_uptodate(info->extent_root->node)) {
11544 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11548 if (!extent_buffer_uptodate(info->csum_root->node)) {
11549 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11554 if (!ctx.progress_enabled)
11555 fprintf(stderr, "checking extents\n");
11556 if (check_mode == CHECK_MODE_LOWMEM)
11557 ret = check_chunks_and_extents_v2(root);
11559 ret = check_chunks_and_extents(root);
11561 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11563 ret = repair_root_items(info);
11567 fprintf(stderr, "Fixed %d roots.\n", ret);
11569 } else if (ret > 0) {
11571 "Found %d roots with an outdated root item.\n",
11574 "Please run a filesystem check with the option --repair to fix them.\n");
11579 if (!ctx.progress_enabled) {
11580 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11581 fprintf(stderr, "checking free space tree\n");
11583 fprintf(stderr, "checking free space cache\n");
11585 ret = check_space_cache(root);
11590 * We used to have to have these hole extents in between our real
11591 * extents so if we don't have this flag set we need to make sure there
11592 * are no gaps in the file extents for inodes, otherwise we can just
11593 * ignore it when this happens.
11595 no_holes = btrfs_fs_incompat(root->fs_info,
11596 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11597 if (!ctx.progress_enabled)
11598 fprintf(stderr, "checking fs roots\n");
11599 ret = check_fs_roots(root, &root_cache);
11603 fprintf(stderr, "checking csums\n");
11604 ret = check_csums(root);
11608 fprintf(stderr, "checking root refs\n");
11609 ret = check_root_refs(root, &root_cache);
11613 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11614 struct extent_buffer *eb;
11616 eb = list_first_entry(&root->fs_info->recow_ebs,
11617 struct extent_buffer, recow);
11618 list_del_init(&eb->recow);
11619 ret = recow_extent_buffer(root, eb);
11624 while (!list_empty(&delete_items)) {
11625 struct bad_item *bad;
11627 bad = list_first_entry(&delete_items, struct bad_item, list);
11628 list_del_init(&bad->list);
11630 ret = delete_bad_item(root, bad);
11634 if (info->quota_enabled) {
11636 fprintf(stderr, "checking quota groups\n");
11637 err = qgroup_verify_all(info);
11641 err = repair_qgroups(info, &qgroups_repaired);
11646 if (!list_empty(&root->fs_info->recow_ebs)) {
11647 fprintf(stderr, "Transid errors in file system\n");
11651 /* Don't override original ret */
11652 if (!ret && qgroups_repaired)
11653 ret = qgroups_repaired;
11655 if (found_old_backref) { /*
11656 * there was a disk format change when mixed
11657 * backref was in testing tree. The old format
11658 * existed about one week.
11660 printf("\n * Found old mixed backref format. "
11661 "The old format is not supported! *"
11662 "\n * Please mount the FS in readonly mode, "
11663 "backup data and re-format the FS. *\n\n");
11666 printf("found %llu bytes used err is %d\n",
11667 (unsigned long long)bytes_used, ret);
11668 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11669 printf("total tree bytes: %llu\n",
11670 (unsigned long long)total_btree_bytes);
11671 printf("total fs tree bytes: %llu\n",
11672 (unsigned long long)total_fs_tree_bytes);
11673 printf("total extent tree bytes: %llu\n",
11674 (unsigned long long)total_extent_tree_bytes);
11675 printf("btree space waste bytes: %llu\n",
11676 (unsigned long long)btree_space_waste);
11677 printf("file data blocks allocated: %llu\n referenced %llu\n",
11678 (unsigned long long)data_bytes_allocated,
11679 (unsigned long long)data_bytes_referenced);
11681 free_qgroup_counts();
11682 free_root_recs_tree(&root_cache);
11686 if (ctx.progress_enabled)
11687 task_deinit(ctx.info);