2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
88 unsigned int is_data:1;
89 unsigned int found_extent_tree:1;
90 unsigned int full_backref:1;
91 unsigned int found_ref:1;
92 unsigned int broken:1;
95 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
97 return rb_entry(node, struct extent_backref, node);
100 struct data_backref {
101 struct extent_backref node;
115 static inline struct data_backref* to_data_backref(struct extent_backref *back)
117 return container_of(back, struct data_backref, node);
120 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
122 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
123 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
124 struct data_backref *back1 = to_data_backref(ext1);
125 struct data_backref *back2 = to_data_backref(ext2);
127 WARN_ON(!ext1->is_data);
128 WARN_ON(!ext2->is_data);
130 /* parent and root are a union, so this covers both */
131 if (back1->parent > back2->parent)
133 if (back1->parent < back2->parent)
136 /* This is a full backref and the parents match. */
137 if (back1->node.full_backref)
140 if (back1->owner > back2->owner)
142 if (back1->owner < back2->owner)
145 if (back1->offset > back2->offset)
147 if (back1->offset < back2->offset)
150 if (back1->bytes > back2->bytes)
152 if (back1->bytes < back2->bytes)
155 if (back1->found_ref && back2->found_ref) {
156 if (back1->disk_bytenr > back2->disk_bytenr)
158 if (back1->disk_bytenr < back2->disk_bytenr)
161 if (back1->found_ref > back2->found_ref)
163 if (back1->found_ref < back2->found_ref)
171 * Much like data_backref, just removed the undetermined members
172 * and change it to use list_head.
173 * During extent scan, it is stored in root->orphan_data_extent.
174 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
176 struct orphan_data_extent {
177 struct list_head list;
185 struct tree_backref {
186 struct extent_backref node;
193 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
195 return container_of(back, struct tree_backref, node);
198 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
200 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
201 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
202 struct tree_backref *back1 = to_tree_backref(ext1);
203 struct tree_backref *back2 = to_tree_backref(ext2);
205 WARN_ON(ext1->is_data);
206 WARN_ON(ext2->is_data);
208 /* parent and root are a union, so this covers both */
209 if (back1->parent > back2->parent)
211 if (back1->parent < back2->parent)
217 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
219 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
220 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
222 if (ext1->is_data > ext2->is_data)
225 if (ext1->is_data < ext2->is_data)
228 if (ext1->full_backref > ext2->full_backref)
230 if (ext1->full_backref < ext2->full_backref)
234 return compare_data_backref(node1, node2);
236 return compare_tree_backref(node1, node2);
239 /* Explicit initialization for extent_record::flag_block_full_backref */
240 enum { FLAG_UNSET = 2 };
242 struct extent_record {
243 struct list_head backrefs;
244 struct list_head dups;
245 struct rb_root backref_tree;
246 struct list_head list;
247 struct cache_extent cache;
248 struct btrfs_disk_key parent_key;
253 u64 extent_item_refs;
255 u64 parent_generation;
259 unsigned int flag_block_full_backref:2;
260 unsigned int found_rec:1;
261 unsigned int content_checked:1;
262 unsigned int owner_ref_checked:1;
263 unsigned int is_root:1;
264 unsigned int metadata:1;
265 unsigned int bad_full_backref:1;
266 unsigned int crossing_stripes:1;
267 unsigned int wrong_chunk_type:1;
270 static inline struct extent_record* to_extent_record(struct list_head *entry)
272 return container_of(entry, struct extent_record, list);
275 struct inode_backref {
276 struct list_head list;
277 unsigned int found_dir_item:1;
278 unsigned int found_dir_index:1;
279 unsigned int found_inode_ref:1;
280 unsigned int filetype:8;
282 unsigned int ref_type;
289 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
291 return list_entry(entry, struct inode_backref, list);
294 struct root_item_record {
295 struct list_head list;
302 struct btrfs_key drop_key;
305 #define REF_ERR_NO_DIR_ITEM (1 << 0)
306 #define REF_ERR_NO_DIR_INDEX (1 << 1)
307 #define REF_ERR_NO_INODE_REF (1 << 2)
308 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
309 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
310 #define REF_ERR_DUP_INODE_REF (1 << 5)
311 #define REF_ERR_INDEX_UNMATCH (1 << 6)
312 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
313 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
314 #define REF_ERR_NO_ROOT_REF (1 << 9)
315 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
316 #define REF_ERR_DUP_ROOT_REF (1 << 11)
317 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
319 struct file_extent_hole {
325 struct inode_record {
326 struct list_head backrefs;
327 unsigned int checked:1;
328 unsigned int merging:1;
329 unsigned int found_inode_item:1;
330 unsigned int found_dir_item:1;
331 unsigned int found_file_extent:1;
332 unsigned int found_csum_item:1;
333 unsigned int some_csum_missing:1;
334 unsigned int nodatasum:1;
347 struct rb_root holes;
348 struct list_head orphan_extents;
353 #define I_ERR_NO_INODE_ITEM (1 << 0)
354 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
355 #define I_ERR_DUP_INODE_ITEM (1 << 2)
356 #define I_ERR_DUP_DIR_INDEX (1 << 3)
357 #define I_ERR_ODD_DIR_ITEM (1 << 4)
358 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
359 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
360 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
361 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
362 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
363 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
364 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
365 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
366 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
367 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
369 struct root_backref {
370 struct list_head list;
371 unsigned int found_dir_item:1;
372 unsigned int found_dir_index:1;
373 unsigned int found_back_ref:1;
374 unsigned int found_forward_ref:1;
375 unsigned int reachable:1;
384 static inline struct root_backref* to_root_backref(struct list_head *entry)
386 return list_entry(entry, struct root_backref, list);
390 struct list_head backrefs;
391 struct cache_extent cache;
392 unsigned int found_root_item:1;
398 struct cache_extent cache;
403 struct cache_extent cache;
404 struct cache_tree root_cache;
405 struct cache_tree inode_cache;
406 struct inode_record *current;
415 struct walk_control {
416 struct cache_tree shared;
417 struct shared_node *nodes[BTRFS_MAX_LEVEL];
423 struct btrfs_key key;
425 struct list_head list;
428 struct extent_entry {
433 struct list_head list;
436 struct root_item_info {
437 /* level of the root */
439 /* number of nodes at this level, must be 1 for a root */
443 struct cache_extent cache_extent;
447 * Error bit for low memory mode check.
449 * Currently no caller cares about it yet. Just internal use for error
452 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
453 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
454 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
455 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
456 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
457 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
458 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
459 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
460 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
461 #define CHUNK_TYPE_MISMATCH (1 << 8)
463 static void *print_status_check(void *p)
465 struct task_ctx *priv = p;
466 const char work_indicator[] = { '.', 'o', 'O', 'o' };
468 static char *task_position_string[] = {
470 "checking free space cache",
474 task_period_start(priv->info, 1000 /* 1s */);
476 if (priv->tp == TASK_NOTHING)
480 printf("%s [%c]\r", task_position_string[priv->tp],
481 work_indicator[count % 4]);
484 task_period_wait(priv->info);
489 static int print_status_return(void *p)
497 static enum btrfs_check_mode parse_check_mode(const char *str)
499 if (strcmp(str, "lowmem") == 0)
500 return CHECK_MODE_LOWMEM;
501 if (strcmp(str, "orig") == 0)
502 return CHECK_MODE_ORIGINAL;
503 if (strcmp(str, "original") == 0)
504 return CHECK_MODE_ORIGINAL;
506 return CHECK_MODE_UNKNOWN;
509 /* Compatible function to allow reuse of old codes */
510 static u64 first_extent_gap(struct rb_root *holes)
512 struct file_extent_hole *hole;
514 if (RB_EMPTY_ROOT(holes))
517 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
521 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
523 struct file_extent_hole *hole1;
524 struct file_extent_hole *hole2;
526 hole1 = rb_entry(node1, struct file_extent_hole, node);
527 hole2 = rb_entry(node2, struct file_extent_hole, node);
529 if (hole1->start > hole2->start)
531 if (hole1->start < hole2->start)
533 /* Now hole1->start == hole2->start */
534 if (hole1->len >= hole2->len)
536 * Hole 1 will be merge center
537 * Same hole will be merged later
540 /* Hole 2 will be merge center */
545 * Add a hole to the record
547 * This will do hole merge for copy_file_extent_holes(),
548 * which will ensure there won't be continuous holes.
550 static int add_file_extent_hole(struct rb_root *holes,
553 struct file_extent_hole *hole;
554 struct file_extent_hole *prev = NULL;
555 struct file_extent_hole *next = NULL;
557 hole = malloc(sizeof(*hole));
562 /* Since compare will not return 0, no -EEXIST will happen */
563 rb_insert(holes, &hole->node, compare_hole);
565 /* simple merge with previous hole */
566 if (rb_prev(&hole->node))
567 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
569 if (prev && prev->start + prev->len >= hole->start) {
570 hole->len = hole->start + hole->len - prev->start;
571 hole->start = prev->start;
572 rb_erase(&prev->node, holes);
577 /* iterate merge with next holes */
579 if (!rb_next(&hole->node))
581 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
583 if (hole->start + hole->len >= next->start) {
584 if (hole->start + hole->len <= next->start + next->len)
585 hole->len = next->start + next->len -
587 rb_erase(&next->node, holes);
596 static int compare_hole_range(struct rb_node *node, void *data)
598 struct file_extent_hole *hole;
601 hole = (struct file_extent_hole *)data;
604 hole = rb_entry(node, struct file_extent_hole, node);
605 if (start < hole->start)
607 if (start >= hole->start && start < hole->start + hole->len)
613 * Delete a hole in the record
615 * This will do the hole split and is much restrict than add.
617 static int del_file_extent_hole(struct rb_root *holes,
620 struct file_extent_hole *hole;
621 struct file_extent_hole tmp;
626 struct rb_node *node;
633 node = rb_search(holes, &tmp, compare_hole_range, NULL);
636 hole = rb_entry(node, struct file_extent_hole, node);
637 if (start + len > hole->start + hole->len)
641 * Now there will be no overlap, delete the hole and re-add the
642 * split(s) if they exists.
644 if (start > hole->start) {
645 prev_start = hole->start;
646 prev_len = start - hole->start;
649 if (hole->start + hole->len > start + len) {
650 next_start = start + len;
651 next_len = hole->start + hole->len - start - len;
654 rb_erase(node, holes);
657 ret = add_file_extent_hole(holes, prev_start, prev_len);
662 ret = add_file_extent_hole(holes, next_start, next_len);
669 static int copy_file_extent_holes(struct rb_root *dst,
672 struct file_extent_hole *hole;
673 struct rb_node *node;
676 node = rb_first(src);
678 hole = rb_entry(node, struct file_extent_hole, node);
679 ret = add_file_extent_hole(dst, hole->start, hole->len);
682 node = rb_next(node);
687 static void free_file_extent_holes(struct rb_root *holes)
689 struct rb_node *node;
690 struct file_extent_hole *hole;
692 node = rb_first(holes);
694 hole = rb_entry(node, struct file_extent_hole, node);
695 rb_erase(node, holes);
697 node = rb_first(holes);
701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
703 static void record_root_in_trans(struct btrfs_trans_handle *trans,
704 struct btrfs_root *root)
706 if (root->last_trans != trans->transid) {
707 root->track_dirty = 1;
708 root->last_trans = trans->transid;
709 root->commit_root = root->node;
710 extent_buffer_get(root->node);
714 static u8 imode_to_type(u32 imode)
717 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
718 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
719 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
720 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
721 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
722 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
723 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
724 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
727 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
731 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
733 struct device_record *rec1;
734 struct device_record *rec2;
736 rec1 = rb_entry(node1, struct device_record, node);
737 rec2 = rb_entry(node2, struct device_record, node);
738 if (rec1->devid > rec2->devid)
740 else if (rec1->devid < rec2->devid)
746 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
748 struct inode_record *rec;
749 struct inode_backref *backref;
750 struct inode_backref *orig;
751 struct inode_backref *tmp;
752 struct orphan_data_extent *src_orphan;
753 struct orphan_data_extent *dst_orphan;
757 rec = malloc(sizeof(*rec));
759 return ERR_PTR(-ENOMEM);
760 memcpy(rec, orig_rec, sizeof(*rec));
762 INIT_LIST_HEAD(&rec->backrefs);
763 INIT_LIST_HEAD(&rec->orphan_extents);
764 rec->holes = RB_ROOT;
766 list_for_each_entry(orig, &orig_rec->backrefs, list) {
767 size = sizeof(*orig) + orig->namelen + 1;
768 backref = malloc(size);
773 memcpy(backref, orig, size);
774 list_add_tail(&backref->list, &rec->backrefs);
776 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
777 dst_orphan = malloc(sizeof(*dst_orphan));
782 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
783 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
785 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
791 if (!list_empty(&rec->backrefs))
792 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
793 list_del(&orig->list);
797 if (!list_empty(&rec->orphan_extents))
798 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
799 list_del(&orig->list);
808 static void print_orphan_data_extents(struct list_head *orphan_extents,
811 struct orphan_data_extent *orphan;
813 if (list_empty(orphan_extents))
815 printf("The following data extent is lost in tree %llu:\n",
817 list_for_each_entry(orphan, orphan_extents, list) {
818 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
819 orphan->objectid, orphan->offset, orphan->disk_bytenr,
824 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
826 u64 root_objectid = root->root_key.objectid;
827 int errors = rec->errors;
831 /* reloc root errors, we print its corresponding fs root objectid*/
832 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
833 root_objectid = root->root_key.offset;
834 fprintf(stderr, "reloc");
836 fprintf(stderr, "root %llu inode %llu errors %x",
837 (unsigned long long) root_objectid,
838 (unsigned long long) rec->ino, rec->errors);
840 if (errors & I_ERR_NO_INODE_ITEM)
841 fprintf(stderr, ", no inode item");
842 if (errors & I_ERR_NO_ORPHAN_ITEM)
843 fprintf(stderr, ", no orphan item");
844 if (errors & I_ERR_DUP_INODE_ITEM)
845 fprintf(stderr, ", dup inode item");
846 if (errors & I_ERR_DUP_DIR_INDEX)
847 fprintf(stderr, ", dup dir index");
848 if (errors & I_ERR_ODD_DIR_ITEM)
849 fprintf(stderr, ", odd dir item");
850 if (errors & I_ERR_ODD_FILE_EXTENT)
851 fprintf(stderr, ", odd file extent");
852 if (errors & I_ERR_BAD_FILE_EXTENT)
853 fprintf(stderr, ", bad file extent");
854 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
855 fprintf(stderr, ", file extent overlap");
856 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
857 fprintf(stderr, ", file extent discount");
858 if (errors & I_ERR_DIR_ISIZE_WRONG)
859 fprintf(stderr, ", dir isize wrong");
860 if (errors & I_ERR_FILE_NBYTES_WRONG)
861 fprintf(stderr, ", nbytes wrong");
862 if (errors & I_ERR_ODD_CSUM_ITEM)
863 fprintf(stderr, ", odd csum item");
864 if (errors & I_ERR_SOME_CSUM_MISSING)
865 fprintf(stderr, ", some csum missing");
866 if (errors & I_ERR_LINK_COUNT_WRONG)
867 fprintf(stderr, ", link count wrong");
868 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
869 fprintf(stderr, ", orphan file extent");
870 fprintf(stderr, "\n");
871 /* Print the orphan extents if needed */
872 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
873 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
875 /* Print the holes if needed */
876 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
877 struct file_extent_hole *hole;
878 struct rb_node *node;
881 node = rb_first(&rec->holes);
882 fprintf(stderr, "Found file extent holes:\n");
885 hole = rb_entry(node, struct file_extent_hole, node);
886 fprintf(stderr, "\tstart: %llu, len: %llu\n",
887 hole->start, hole->len);
888 node = rb_next(node);
891 fprintf(stderr, "\tstart: 0, len: %llu\n",
892 round_up(rec->isize, root->sectorsize));
896 static void print_ref_error(int errors)
898 if (errors & REF_ERR_NO_DIR_ITEM)
899 fprintf(stderr, ", no dir item");
900 if (errors & REF_ERR_NO_DIR_INDEX)
901 fprintf(stderr, ", no dir index");
902 if (errors & REF_ERR_NO_INODE_REF)
903 fprintf(stderr, ", no inode ref");
904 if (errors & REF_ERR_DUP_DIR_ITEM)
905 fprintf(stderr, ", dup dir item");
906 if (errors & REF_ERR_DUP_DIR_INDEX)
907 fprintf(stderr, ", dup dir index");
908 if (errors & REF_ERR_DUP_INODE_REF)
909 fprintf(stderr, ", dup inode ref");
910 if (errors & REF_ERR_INDEX_UNMATCH)
911 fprintf(stderr, ", index mismatch");
912 if (errors & REF_ERR_FILETYPE_UNMATCH)
913 fprintf(stderr, ", filetype mismatch");
914 if (errors & REF_ERR_NAME_TOO_LONG)
915 fprintf(stderr, ", name too long");
916 if (errors & REF_ERR_NO_ROOT_REF)
917 fprintf(stderr, ", no root ref");
918 if (errors & REF_ERR_NO_ROOT_BACKREF)
919 fprintf(stderr, ", no root backref");
920 if (errors & REF_ERR_DUP_ROOT_REF)
921 fprintf(stderr, ", dup root ref");
922 if (errors & REF_ERR_DUP_ROOT_BACKREF)
923 fprintf(stderr, ", dup root backref");
924 fprintf(stderr, "\n");
927 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
930 struct ptr_node *node;
931 struct cache_extent *cache;
932 struct inode_record *rec = NULL;
935 cache = lookup_cache_extent(inode_cache, ino, 1);
937 node = container_of(cache, struct ptr_node, cache);
939 if (mod && rec->refs > 1) {
940 node->data = clone_inode_rec(rec);
941 if (IS_ERR(node->data))
947 rec = calloc(1, sizeof(*rec));
949 return ERR_PTR(-ENOMEM);
951 rec->extent_start = (u64)-1;
953 INIT_LIST_HEAD(&rec->backrefs);
954 INIT_LIST_HEAD(&rec->orphan_extents);
955 rec->holes = RB_ROOT;
957 node = malloc(sizeof(*node));
960 return ERR_PTR(-ENOMEM);
962 node->cache.start = ino;
963 node->cache.size = 1;
966 if (ino == BTRFS_FREE_INO_OBJECTID)
969 ret = insert_cache_extent(inode_cache, &node->cache);
971 return ERR_PTR(-EEXIST);
976 static void free_orphan_data_extents(struct list_head *orphan_extents)
978 struct orphan_data_extent *orphan;
980 while (!list_empty(orphan_extents)) {
981 orphan = list_entry(orphan_extents->next,
982 struct orphan_data_extent, list);
983 list_del(&orphan->list);
988 static void free_inode_rec(struct inode_record *rec)
990 struct inode_backref *backref;
995 while (!list_empty(&rec->backrefs)) {
996 backref = to_inode_backref(rec->backrefs.next);
997 list_del(&backref->list);
1000 free_orphan_data_extents(&rec->orphan_extents);
1001 free_file_extent_holes(&rec->holes);
1005 static int can_free_inode_rec(struct inode_record *rec)
1007 if (!rec->errors && rec->checked && rec->found_inode_item &&
1008 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1013 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1014 struct inode_record *rec)
1016 struct cache_extent *cache;
1017 struct inode_backref *tmp, *backref;
1018 struct ptr_node *node;
1019 unsigned char filetype;
1021 if (!rec->found_inode_item)
1024 filetype = imode_to_type(rec->imode);
1025 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1026 if (backref->found_dir_item && backref->found_dir_index) {
1027 if (backref->filetype != filetype)
1028 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1029 if (!backref->errors && backref->found_inode_ref &&
1030 rec->nlink == rec->found_link) {
1031 list_del(&backref->list);
1037 if (!rec->checked || rec->merging)
1040 if (S_ISDIR(rec->imode)) {
1041 if (rec->found_size != rec->isize)
1042 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1043 if (rec->found_file_extent)
1044 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1045 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1046 if (rec->found_dir_item)
1047 rec->errors |= I_ERR_ODD_DIR_ITEM;
1048 if (rec->found_size != rec->nbytes)
1049 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1050 if (rec->nlink > 0 && !no_holes &&
1051 (rec->extent_end < rec->isize ||
1052 first_extent_gap(&rec->holes) < rec->isize))
1053 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1056 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1057 if (rec->found_csum_item && rec->nodatasum)
1058 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1059 if (rec->some_csum_missing && !rec->nodatasum)
1060 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1063 BUG_ON(rec->refs != 1);
1064 if (can_free_inode_rec(rec)) {
1065 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1066 node = container_of(cache, struct ptr_node, cache);
1067 BUG_ON(node->data != rec);
1068 remove_cache_extent(inode_cache, &node->cache);
1070 free_inode_rec(rec);
1074 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1076 struct btrfs_path path;
1077 struct btrfs_key key;
1080 key.objectid = BTRFS_ORPHAN_OBJECTID;
1081 key.type = BTRFS_ORPHAN_ITEM_KEY;
1084 btrfs_init_path(&path);
1085 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1086 btrfs_release_path(&path);
1092 static int process_inode_item(struct extent_buffer *eb,
1093 int slot, struct btrfs_key *key,
1094 struct shared_node *active_node)
1096 struct inode_record *rec;
1097 struct btrfs_inode_item *item;
1099 rec = active_node->current;
1100 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1101 if (rec->found_inode_item) {
1102 rec->errors |= I_ERR_DUP_INODE_ITEM;
1105 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1106 rec->nlink = btrfs_inode_nlink(eb, item);
1107 rec->isize = btrfs_inode_size(eb, item);
1108 rec->nbytes = btrfs_inode_nbytes(eb, item);
1109 rec->imode = btrfs_inode_mode(eb, item);
1110 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1112 rec->found_inode_item = 1;
1113 if (rec->nlink == 0)
1114 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1115 maybe_free_inode_rec(&active_node->inode_cache, rec);
1119 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1121 int namelen, u64 dir)
1123 struct inode_backref *backref;
1125 list_for_each_entry(backref, &rec->backrefs, list) {
1126 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1128 if (backref->dir != dir || backref->namelen != namelen)
1130 if (memcmp(name, backref->name, namelen))
1135 backref = malloc(sizeof(*backref) + namelen + 1);
1138 memset(backref, 0, sizeof(*backref));
1140 backref->namelen = namelen;
1141 memcpy(backref->name, name, namelen);
1142 backref->name[namelen] = '\0';
1143 list_add_tail(&backref->list, &rec->backrefs);
1147 static int add_inode_backref(struct cache_tree *inode_cache,
1148 u64 ino, u64 dir, u64 index,
1149 const char *name, int namelen,
1150 int filetype, int itemtype, int errors)
1152 struct inode_record *rec;
1153 struct inode_backref *backref;
1155 rec = get_inode_rec(inode_cache, ino, 1);
1156 BUG_ON(IS_ERR(rec));
1157 backref = get_inode_backref(rec, name, namelen, dir);
1160 backref->errors |= errors;
1161 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1162 if (backref->found_dir_index)
1163 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1164 if (backref->found_inode_ref && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1166 if (backref->found_dir_item && backref->filetype != filetype)
1167 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1169 backref->index = index;
1170 backref->filetype = filetype;
1171 backref->found_dir_index = 1;
1172 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1174 if (backref->found_dir_item)
1175 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1176 if (backref->found_dir_index && backref->filetype != filetype)
1177 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1179 backref->filetype = filetype;
1180 backref->found_dir_item = 1;
1181 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1182 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1183 if (backref->found_inode_ref)
1184 backref->errors |= REF_ERR_DUP_INODE_REF;
1185 if (backref->found_dir_index && backref->index != index)
1186 backref->errors |= REF_ERR_INDEX_UNMATCH;
1188 backref->index = index;
1190 backref->ref_type = itemtype;
1191 backref->found_inode_ref = 1;
1196 maybe_free_inode_rec(inode_cache, rec);
1200 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1201 struct cache_tree *dst_cache)
1203 struct inode_backref *backref;
1208 list_for_each_entry(backref, &src->backrefs, list) {
1209 if (backref->found_dir_index) {
1210 add_inode_backref(dst_cache, dst->ino, backref->dir,
1211 backref->index, backref->name,
1212 backref->namelen, backref->filetype,
1213 BTRFS_DIR_INDEX_KEY, backref->errors);
1215 if (backref->found_dir_item) {
1217 add_inode_backref(dst_cache, dst->ino,
1218 backref->dir, 0, backref->name,
1219 backref->namelen, backref->filetype,
1220 BTRFS_DIR_ITEM_KEY, backref->errors);
1222 if (backref->found_inode_ref) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, backref->index,
1225 backref->name, backref->namelen, 0,
1226 backref->ref_type, backref->errors);
1230 if (src->found_dir_item)
1231 dst->found_dir_item = 1;
1232 if (src->found_file_extent)
1233 dst->found_file_extent = 1;
1234 if (src->found_csum_item)
1235 dst->found_csum_item = 1;
1236 if (src->some_csum_missing)
1237 dst->some_csum_missing = 1;
1238 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1239 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1244 BUG_ON(src->found_link < dir_count);
1245 dst->found_link += src->found_link - dir_count;
1246 dst->found_size += src->found_size;
1247 if (src->extent_start != (u64)-1) {
1248 if (dst->extent_start == (u64)-1) {
1249 dst->extent_start = src->extent_start;
1250 dst->extent_end = src->extent_end;
1252 if (dst->extent_end > src->extent_start)
1253 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1254 else if (dst->extent_end < src->extent_start) {
1255 ret = add_file_extent_hole(&dst->holes,
1257 src->extent_start - dst->extent_end);
1259 if (dst->extent_end < src->extent_end)
1260 dst->extent_end = src->extent_end;
1264 dst->errors |= src->errors;
1265 if (src->found_inode_item) {
1266 if (!dst->found_inode_item) {
1267 dst->nlink = src->nlink;
1268 dst->isize = src->isize;
1269 dst->nbytes = src->nbytes;
1270 dst->imode = src->imode;
1271 dst->nodatasum = src->nodatasum;
1272 dst->found_inode_item = 1;
1274 dst->errors |= I_ERR_DUP_INODE_ITEM;
1282 static int splice_shared_node(struct shared_node *src_node,
1283 struct shared_node *dst_node)
1285 struct cache_extent *cache;
1286 struct ptr_node *node, *ins;
1287 struct cache_tree *src, *dst;
1288 struct inode_record *rec, *conflict;
1289 u64 current_ino = 0;
1293 if (--src_node->refs == 0)
1295 if (src_node->current)
1296 current_ino = src_node->current->ino;
1298 src = &src_node->root_cache;
1299 dst = &dst_node->root_cache;
1301 cache = search_cache_extent(src, 0);
1303 node = container_of(cache, struct ptr_node, cache);
1305 cache = next_cache_extent(cache);
1308 remove_cache_extent(src, &node->cache);
1311 ins = malloc(sizeof(*ins));
1313 ins->cache.start = node->cache.start;
1314 ins->cache.size = node->cache.size;
1318 ret = insert_cache_extent(dst, &ins->cache);
1319 if (ret == -EEXIST) {
1320 conflict = get_inode_rec(dst, rec->ino, 1);
1321 BUG_ON(IS_ERR(conflict));
1322 merge_inode_recs(rec, conflict, dst);
1324 conflict->checked = 1;
1325 if (dst_node->current == conflict)
1326 dst_node->current = NULL;
1328 maybe_free_inode_rec(dst, conflict);
1329 free_inode_rec(rec);
1336 if (src == &src_node->root_cache) {
1337 src = &src_node->inode_cache;
1338 dst = &dst_node->inode_cache;
1342 if (current_ino > 0 && (!dst_node->current ||
1343 current_ino > dst_node->current->ino)) {
1344 if (dst_node->current) {
1345 dst_node->current->checked = 1;
1346 maybe_free_inode_rec(dst, dst_node->current);
1348 dst_node->current = get_inode_rec(dst, current_ino, 1);
1349 BUG_ON(IS_ERR(dst_node->current));
1354 static void free_inode_ptr(struct cache_extent *cache)
1356 struct ptr_node *node;
1357 struct inode_record *rec;
1359 node = container_of(cache, struct ptr_node, cache);
1361 free_inode_rec(rec);
1365 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1367 static struct shared_node *find_shared_node(struct cache_tree *shared,
1370 struct cache_extent *cache;
1371 struct shared_node *node;
1373 cache = lookup_cache_extent(shared, bytenr, 1);
1375 node = container_of(cache, struct shared_node, cache);
1381 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1384 struct shared_node *node;
1386 node = calloc(1, sizeof(*node));
1389 node->cache.start = bytenr;
1390 node->cache.size = 1;
1391 cache_tree_init(&node->root_cache);
1392 cache_tree_init(&node->inode_cache);
1395 ret = insert_cache_extent(shared, &node->cache);
1400 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1401 struct walk_control *wc, int level)
1403 struct shared_node *node;
1404 struct shared_node *dest;
1407 if (level == wc->active_node)
1410 BUG_ON(wc->active_node <= level);
1411 node = find_shared_node(&wc->shared, bytenr);
1413 ret = add_shared_node(&wc->shared, bytenr, refs);
1415 node = find_shared_node(&wc->shared, bytenr);
1416 wc->nodes[level] = node;
1417 wc->active_node = level;
1421 if (wc->root_level == wc->active_node &&
1422 btrfs_root_refs(&root->root_item) == 0) {
1423 if (--node->refs == 0) {
1424 free_inode_recs_tree(&node->root_cache);
1425 free_inode_recs_tree(&node->inode_cache);
1426 remove_cache_extent(&wc->shared, &node->cache);
1432 dest = wc->nodes[wc->active_node];
1433 splice_shared_node(node, dest);
1434 if (node->refs == 0) {
1435 remove_cache_extent(&wc->shared, &node->cache);
1441 static int leave_shared_node(struct btrfs_root *root,
1442 struct walk_control *wc, int level)
1444 struct shared_node *node;
1445 struct shared_node *dest;
1448 if (level == wc->root_level)
1451 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1455 BUG_ON(i >= BTRFS_MAX_LEVEL);
1457 node = wc->nodes[wc->active_node];
1458 wc->nodes[wc->active_node] = NULL;
1459 wc->active_node = i;
1461 dest = wc->nodes[wc->active_node];
1462 if (wc->active_node < wc->root_level ||
1463 btrfs_root_refs(&root->root_item) > 0) {
1464 BUG_ON(node->refs <= 1);
1465 splice_shared_node(node, dest);
1467 BUG_ON(node->refs < 2);
1476 * 1 - if the root with id child_root_id is a child of root parent_root_id
1477 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1478 * has other root(s) as parent(s)
1479 * 2 - if the root child_root_id doesn't have any parent roots
1481 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1484 struct btrfs_path path;
1485 struct btrfs_key key;
1486 struct extent_buffer *leaf;
1490 btrfs_init_path(&path);
1492 key.objectid = parent_root_id;
1493 key.type = BTRFS_ROOT_REF_KEY;
1494 key.offset = child_root_id;
1495 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1499 btrfs_release_path(&path);
1503 key.objectid = child_root_id;
1504 key.type = BTRFS_ROOT_BACKREF_KEY;
1506 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1512 leaf = path.nodes[0];
1513 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1514 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1517 leaf = path.nodes[0];
1520 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1521 if (key.objectid != child_root_id ||
1522 key.type != BTRFS_ROOT_BACKREF_KEY)
1527 if (key.offset == parent_root_id) {
1528 btrfs_release_path(&path);
1535 btrfs_release_path(&path);
1538 return has_parent ? 0 : 2;
1541 static int process_dir_item(struct btrfs_root *root,
1542 struct extent_buffer *eb,
1543 int slot, struct btrfs_key *key,
1544 struct shared_node *active_node)
1554 struct btrfs_dir_item *di;
1555 struct inode_record *rec;
1556 struct cache_tree *root_cache;
1557 struct cache_tree *inode_cache;
1558 struct btrfs_key location;
1559 char namebuf[BTRFS_NAME_LEN];
1561 root_cache = &active_node->root_cache;
1562 inode_cache = &active_node->inode_cache;
1563 rec = active_node->current;
1564 rec->found_dir_item = 1;
1566 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1567 total = btrfs_item_size_nr(eb, slot);
1568 while (cur < total) {
1570 btrfs_dir_item_key_to_cpu(eb, di, &location);
1571 name_len = btrfs_dir_name_len(eb, di);
1572 data_len = btrfs_dir_data_len(eb, di);
1573 filetype = btrfs_dir_type(eb, di);
1575 rec->found_size += name_len;
1576 if (name_len <= BTRFS_NAME_LEN) {
1580 len = BTRFS_NAME_LEN;
1581 error = REF_ERR_NAME_TOO_LONG;
1583 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1585 if (location.type == BTRFS_INODE_ITEM_KEY) {
1586 add_inode_backref(inode_cache, location.objectid,
1587 key->objectid, key->offset, namebuf,
1588 len, filetype, key->type, error);
1589 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1590 add_inode_backref(root_cache, location.objectid,
1591 key->objectid, key->offset,
1592 namebuf, len, filetype,
1595 fprintf(stderr, "invalid location in dir item %u\n",
1597 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1598 key->objectid, key->offset, namebuf,
1599 len, filetype, key->type, error);
1602 len = sizeof(*di) + name_len + data_len;
1603 di = (struct btrfs_dir_item *)((char *)di + len);
1606 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1607 rec->errors |= I_ERR_DUP_DIR_INDEX;
1612 static int process_inode_ref(struct extent_buffer *eb,
1613 int slot, struct btrfs_key *key,
1614 struct shared_node *active_node)
1622 struct cache_tree *inode_cache;
1623 struct btrfs_inode_ref *ref;
1624 char namebuf[BTRFS_NAME_LEN];
1626 inode_cache = &active_node->inode_cache;
1628 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1629 total = btrfs_item_size_nr(eb, slot);
1630 while (cur < total) {
1631 name_len = btrfs_inode_ref_name_len(eb, ref);
1632 index = btrfs_inode_ref_index(eb, ref);
1633 if (name_len <= BTRFS_NAME_LEN) {
1637 len = BTRFS_NAME_LEN;
1638 error = REF_ERR_NAME_TOO_LONG;
1640 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1641 add_inode_backref(inode_cache, key->objectid, key->offset,
1642 index, namebuf, len, 0, key->type, error);
1644 len = sizeof(*ref) + name_len;
1645 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1651 static int process_inode_extref(struct extent_buffer *eb,
1652 int slot, struct btrfs_key *key,
1653 struct shared_node *active_node)
1662 struct cache_tree *inode_cache;
1663 struct btrfs_inode_extref *extref;
1664 char namebuf[BTRFS_NAME_LEN];
1666 inode_cache = &active_node->inode_cache;
1668 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1669 total = btrfs_item_size_nr(eb, slot);
1670 while (cur < total) {
1671 name_len = btrfs_inode_extref_name_len(eb, extref);
1672 index = btrfs_inode_extref_index(eb, extref);
1673 parent = btrfs_inode_extref_parent(eb, extref);
1674 if (name_len <= BTRFS_NAME_LEN) {
1678 len = BTRFS_NAME_LEN;
1679 error = REF_ERR_NAME_TOO_LONG;
1681 read_extent_buffer(eb, namebuf,
1682 (unsigned long)(extref + 1), len);
1683 add_inode_backref(inode_cache, key->objectid, parent,
1684 index, namebuf, len, 0, key->type, error);
1686 len = sizeof(*extref) + name_len;
1687 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1694 static int count_csum_range(struct btrfs_root *root, u64 start,
1695 u64 len, u64 *found)
1697 struct btrfs_key key;
1698 struct btrfs_path path;
1699 struct extent_buffer *leaf;
1704 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1706 btrfs_init_path(&path);
1708 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1710 key.type = BTRFS_EXTENT_CSUM_KEY;
1712 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1716 if (ret > 0 && path.slots[0] > 0) {
1717 leaf = path.nodes[0];
1718 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1719 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1720 key.type == BTRFS_EXTENT_CSUM_KEY)
1725 leaf = path.nodes[0];
1726 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1727 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1732 leaf = path.nodes[0];
1735 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1736 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1737 key.type != BTRFS_EXTENT_CSUM_KEY)
1740 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1741 if (key.offset >= start + len)
1744 if (key.offset > start)
1747 size = btrfs_item_size_nr(leaf, path.slots[0]);
1748 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1749 if (csum_end > start) {
1750 size = min(csum_end - start, len);
1759 btrfs_release_path(&path);
1765 static int process_file_extent(struct btrfs_root *root,
1766 struct extent_buffer *eb,
1767 int slot, struct btrfs_key *key,
1768 struct shared_node *active_node)
1770 struct inode_record *rec;
1771 struct btrfs_file_extent_item *fi;
1773 u64 disk_bytenr = 0;
1774 u64 extent_offset = 0;
1775 u64 mask = root->sectorsize - 1;
1779 rec = active_node->current;
1780 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1781 rec->found_file_extent = 1;
1783 if (rec->extent_start == (u64)-1) {
1784 rec->extent_start = key->offset;
1785 rec->extent_end = key->offset;
1788 if (rec->extent_end > key->offset)
1789 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1790 else if (rec->extent_end < key->offset) {
1791 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1792 key->offset - rec->extent_end);
1797 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1798 extent_type = btrfs_file_extent_type(eb, fi);
1800 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1801 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1804 rec->found_size += num_bytes;
1805 num_bytes = (num_bytes + mask) & ~mask;
1806 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1807 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1808 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1809 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1810 extent_offset = btrfs_file_extent_offset(eb, fi);
1811 if (num_bytes == 0 || (num_bytes & mask))
1812 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1813 if (num_bytes + extent_offset >
1814 btrfs_file_extent_ram_bytes(eb, fi))
1815 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1816 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1817 (btrfs_file_extent_compression(eb, fi) ||
1818 btrfs_file_extent_encryption(eb, fi) ||
1819 btrfs_file_extent_other_encoding(eb, fi)))
1820 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1821 if (disk_bytenr > 0)
1822 rec->found_size += num_bytes;
1824 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1826 rec->extent_end = key->offset + num_bytes;
1829 * The data reloc tree will copy full extents into its inode and then
1830 * copy the corresponding csums. Because the extent it copied could be
1831 * a preallocated extent that hasn't been written to yet there may be no
1832 * csums to copy, ergo we won't have csums for our file extent. This is
1833 * ok so just don't bother checking csums if the inode belongs to the
1836 if (disk_bytenr > 0 &&
1837 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1839 if (btrfs_file_extent_compression(eb, fi))
1840 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1842 disk_bytenr += extent_offset;
1844 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1847 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1849 rec->found_csum_item = 1;
1850 if (found < num_bytes)
1851 rec->some_csum_missing = 1;
1852 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1854 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1860 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1861 struct walk_control *wc)
1863 struct btrfs_key key;
1867 struct cache_tree *inode_cache;
1868 struct shared_node *active_node;
1870 if (wc->root_level == wc->active_node &&
1871 btrfs_root_refs(&root->root_item) == 0)
1874 active_node = wc->nodes[wc->active_node];
1875 inode_cache = &active_node->inode_cache;
1876 nritems = btrfs_header_nritems(eb);
1877 for (i = 0; i < nritems; i++) {
1878 btrfs_item_key_to_cpu(eb, &key, i);
1880 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1882 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1885 if (active_node->current == NULL ||
1886 active_node->current->ino < key.objectid) {
1887 if (active_node->current) {
1888 active_node->current->checked = 1;
1889 maybe_free_inode_rec(inode_cache,
1890 active_node->current);
1892 active_node->current = get_inode_rec(inode_cache,
1894 BUG_ON(IS_ERR(active_node->current));
1897 case BTRFS_DIR_ITEM_KEY:
1898 case BTRFS_DIR_INDEX_KEY:
1899 ret = process_dir_item(root, eb, i, &key, active_node);
1901 case BTRFS_INODE_REF_KEY:
1902 ret = process_inode_ref(eb, i, &key, active_node);
1904 case BTRFS_INODE_EXTREF_KEY:
1905 ret = process_inode_extref(eb, i, &key, active_node);
1907 case BTRFS_INODE_ITEM_KEY:
1908 ret = process_inode_item(eb, i, &key, active_node);
1910 case BTRFS_EXTENT_DATA_KEY:
1911 ret = process_file_extent(root, eb, i, &key,
1921 static void reada_walk_down(struct btrfs_root *root,
1922 struct extent_buffer *node, int slot)
1931 level = btrfs_header_level(node);
1935 nritems = btrfs_header_nritems(node);
1936 blocksize = root->nodesize;
1937 for (i = slot; i < nritems; i++) {
1938 bytenr = btrfs_node_blockptr(node, i);
1939 ptr_gen = btrfs_node_ptr_generation(node, i);
1940 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1945 * Check the child node/leaf by the following condition:
1946 * 1. the first item key of the node/leaf should be the same with the one
1948 * 2. block in parent node should match the child node/leaf.
1949 * 3. generation of parent node and child's header should be consistent.
1951 * Or the child node/leaf pointed by the key in parent is not valid.
1953 * We hope to check leaf owner too, but since subvol may share leaves,
1954 * which makes leaf owner check not so strong, key check should be
1955 * sufficient enough for that case.
1957 static int check_child_node(struct btrfs_root *root,
1958 struct extent_buffer *parent, int slot,
1959 struct extent_buffer *child)
1961 struct btrfs_key parent_key;
1962 struct btrfs_key child_key;
1965 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1966 if (btrfs_header_level(child) == 0)
1967 btrfs_item_key_to_cpu(child, &child_key, 0);
1969 btrfs_node_key_to_cpu(child, &child_key, 0);
1971 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1974 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1975 parent_key.objectid, parent_key.type, parent_key.offset,
1976 child_key.objectid, child_key.type, child_key.offset);
1978 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1980 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1981 btrfs_node_blockptr(parent, slot),
1982 btrfs_header_bytenr(child));
1984 if (btrfs_node_ptr_generation(parent, slot) !=
1985 btrfs_header_generation(child)) {
1987 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1988 btrfs_header_generation(child),
1989 btrfs_node_ptr_generation(parent, slot));
1995 u64 bytenr[BTRFS_MAX_LEVEL];
1996 u64 refs[BTRFS_MAX_LEVEL];
1999 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2000 struct walk_control *wc, int *level,
2001 struct node_refs *nrefs)
2003 enum btrfs_tree_block_status status;
2006 struct extent_buffer *next;
2007 struct extent_buffer *cur;
2012 WARN_ON(*level < 0);
2013 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2015 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2016 refs = nrefs->refs[*level];
2019 ret = btrfs_lookup_extent_info(NULL, root,
2020 path->nodes[*level]->start,
2021 *level, 1, &refs, NULL);
2026 nrefs->bytenr[*level] = path->nodes[*level]->start;
2027 nrefs->refs[*level] = refs;
2031 ret = enter_shared_node(root, path->nodes[*level]->start,
2039 while (*level >= 0) {
2040 WARN_ON(*level < 0);
2041 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2042 cur = path->nodes[*level];
2044 if (btrfs_header_level(cur) != *level)
2047 if (path->slots[*level] >= btrfs_header_nritems(cur))
2050 ret = process_one_leaf(root, cur, wc);
2055 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2056 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2057 blocksize = root->nodesize;
2059 if (bytenr == nrefs->bytenr[*level - 1]) {
2060 refs = nrefs->refs[*level - 1];
2062 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2063 *level - 1, 1, &refs, NULL);
2067 nrefs->bytenr[*level - 1] = bytenr;
2068 nrefs->refs[*level - 1] = refs;
2073 ret = enter_shared_node(root, bytenr, refs,
2076 path->slots[*level]++;
2081 next = btrfs_find_tree_block(root, bytenr, blocksize);
2082 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2083 free_extent_buffer(next);
2084 reada_walk_down(root, cur, path->slots[*level]);
2085 next = read_tree_block(root, bytenr, blocksize,
2087 if (!extent_buffer_uptodate(next)) {
2088 struct btrfs_key node_key;
2090 btrfs_node_key_to_cpu(path->nodes[*level],
2092 path->slots[*level]);
2093 btrfs_add_corrupt_extent_record(root->fs_info,
2095 path->nodes[*level]->start,
2096 root->nodesize, *level);
2102 ret = check_child_node(root, cur, path->slots[*level], next);
2108 if (btrfs_is_leaf(next))
2109 status = btrfs_check_leaf(root, NULL, next);
2111 status = btrfs_check_node(root, NULL, next);
2112 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2113 free_extent_buffer(next);
2118 *level = *level - 1;
2119 free_extent_buffer(path->nodes[*level]);
2120 path->nodes[*level] = next;
2121 path->slots[*level] = 0;
2124 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2128 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2129 struct walk_control *wc, int *level)
2132 struct extent_buffer *leaf;
2134 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2135 leaf = path->nodes[i];
2136 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2141 free_extent_buffer(path->nodes[*level]);
2142 path->nodes[*level] = NULL;
2143 BUG_ON(*level > wc->active_node);
2144 if (*level == wc->active_node)
2145 leave_shared_node(root, wc, *level);
2152 static int check_root_dir(struct inode_record *rec)
2154 struct inode_backref *backref;
2157 if (!rec->found_inode_item || rec->errors)
2159 if (rec->nlink != 1 || rec->found_link != 0)
2161 if (list_empty(&rec->backrefs))
2163 backref = to_inode_backref(rec->backrefs.next);
2164 if (!backref->found_inode_ref)
2166 if (backref->index != 0 || backref->namelen != 2 ||
2167 memcmp(backref->name, "..", 2))
2169 if (backref->found_dir_index || backref->found_dir_item)
2176 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2177 struct btrfs_root *root, struct btrfs_path *path,
2178 struct inode_record *rec)
2180 struct btrfs_inode_item *ei;
2181 struct btrfs_key key;
2184 key.objectid = rec->ino;
2185 key.type = BTRFS_INODE_ITEM_KEY;
2186 key.offset = (u64)-1;
2188 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2192 if (!path->slots[0]) {
2199 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2200 if (key.objectid != rec->ino) {
2205 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2206 struct btrfs_inode_item);
2207 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2208 btrfs_mark_buffer_dirty(path->nodes[0]);
2209 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2210 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2211 root->root_key.objectid);
2213 btrfs_release_path(path);
2217 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2218 struct btrfs_root *root,
2219 struct btrfs_path *path,
2220 struct inode_record *rec)
2224 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2225 btrfs_release_path(path);
2227 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2231 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_path *path,
2234 struct inode_record *rec)
2236 struct btrfs_inode_item *ei;
2237 struct btrfs_key key;
2240 key.objectid = rec->ino;
2241 key.type = BTRFS_INODE_ITEM_KEY;
2244 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2251 /* Since ret == 0, no need to check anything */
2252 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2253 struct btrfs_inode_item);
2254 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2255 btrfs_mark_buffer_dirty(path->nodes[0]);
2256 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2257 printf("reset nbytes for ino %llu root %llu\n",
2258 rec->ino, root->root_key.objectid);
2260 btrfs_release_path(path);
2264 static int add_missing_dir_index(struct btrfs_root *root,
2265 struct cache_tree *inode_cache,
2266 struct inode_record *rec,
2267 struct inode_backref *backref)
2269 struct btrfs_path *path;
2270 struct btrfs_trans_handle *trans;
2271 struct btrfs_dir_item *dir_item;
2272 struct extent_buffer *leaf;
2273 struct btrfs_key key;
2274 struct btrfs_disk_key disk_key;
2275 struct inode_record *dir_rec;
2276 unsigned long name_ptr;
2277 u32 data_size = sizeof(*dir_item) + backref->namelen;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2290 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2291 (unsigned long long)rec->ino);
2292 key.objectid = backref->dir;
2293 key.type = BTRFS_DIR_INDEX_KEY;
2294 key.offset = backref->index;
2296 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2299 leaf = path->nodes[0];
2300 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2302 disk_key.objectid = cpu_to_le64(rec->ino);
2303 disk_key.type = BTRFS_INODE_ITEM_KEY;
2304 disk_key.offset = 0;
2306 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2307 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2308 btrfs_set_dir_data_len(leaf, dir_item, 0);
2309 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2310 name_ptr = (unsigned long)(dir_item + 1);
2311 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2312 btrfs_mark_buffer_dirty(leaf);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2316 backref->found_dir_index = 1;
2317 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2318 BUG_ON(IS_ERR(dir_rec));
2321 dir_rec->found_size += backref->namelen;
2322 if (dir_rec->found_size == dir_rec->isize &&
2323 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2324 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2325 if (dir_rec->found_size != dir_rec->isize)
2326 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2331 static int delete_dir_index(struct btrfs_root *root,
2332 struct cache_tree *inode_cache,
2333 struct inode_record *rec,
2334 struct inode_backref *backref)
2336 struct btrfs_trans_handle *trans;
2337 struct btrfs_dir_item *di;
2338 struct btrfs_path *path;
2341 path = btrfs_alloc_path();
2345 trans = btrfs_start_transaction(root, 1);
2346 if (IS_ERR(trans)) {
2347 btrfs_free_path(path);
2348 return PTR_ERR(trans);
2352 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2353 (unsigned long long)backref->dir,
2354 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2355 (unsigned long long)root->objectid);
2357 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2358 backref->name, backref->namelen,
2359 backref->index, -1);
2362 btrfs_free_path(path);
2363 btrfs_commit_transaction(trans, root);
2370 ret = btrfs_del_item(trans, root, path);
2372 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2374 btrfs_free_path(path);
2375 btrfs_commit_transaction(trans, root);
2379 static int create_inode_item(struct btrfs_root *root,
2380 struct inode_record *rec,
2381 struct inode_backref *backref, int root_dir)
2383 struct btrfs_trans_handle *trans;
2384 struct btrfs_inode_item inode_item;
2385 time_t now = time(NULL);
2388 trans = btrfs_start_transaction(root, 1);
2389 if (IS_ERR(trans)) {
2390 ret = PTR_ERR(trans);
2394 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2395 "be incomplete, please check permissions and content after "
2396 "the fsck completes.\n", (unsigned long long)root->objectid,
2397 (unsigned long long)rec->ino);
2399 memset(&inode_item, 0, sizeof(inode_item));
2400 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2402 btrfs_set_stack_inode_nlink(&inode_item, 1);
2404 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2405 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2406 if (rec->found_dir_item) {
2407 if (rec->found_file_extent)
2408 fprintf(stderr, "root %llu inode %llu has both a dir "
2409 "item and extents, unsure if it is a dir or a "
2410 "regular file so setting it as a directory\n",
2411 (unsigned long long)root->objectid,
2412 (unsigned long long)rec->ino);
2413 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2414 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2415 } else if (!rec->found_dir_item) {
2416 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2417 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2419 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2420 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2421 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2422 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2423 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2424 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2425 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2426 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2428 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2430 btrfs_commit_transaction(trans, root);
2434 static int repair_inode_backrefs(struct btrfs_root *root,
2435 struct inode_record *rec,
2436 struct cache_tree *inode_cache,
2439 struct inode_backref *tmp, *backref;
2440 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2444 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2445 if (!delete && rec->ino == root_dirid) {
2446 if (!rec->found_inode_item) {
2447 ret = create_inode_item(root, rec, backref, 1);
2454 /* Index 0 for root dir's are special, don't mess with it */
2455 if (rec->ino == root_dirid && backref->index == 0)
2459 ((backref->found_dir_index && !backref->found_inode_ref) ||
2460 (backref->found_dir_index && backref->found_inode_ref &&
2461 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2462 ret = delete_dir_index(root, inode_cache, rec, backref);
2466 list_del(&backref->list);
2470 if (!delete && !backref->found_dir_index &&
2471 backref->found_dir_item && backref->found_inode_ref) {
2472 ret = add_missing_dir_index(root, inode_cache, rec,
2477 if (backref->found_dir_item &&
2478 backref->found_dir_index &&
2479 backref->found_dir_index) {
2480 if (!backref->errors &&
2481 backref->found_inode_ref) {
2482 list_del(&backref->list);
2488 if (!delete && (!backref->found_dir_index &&
2489 !backref->found_dir_item &&
2490 backref->found_inode_ref)) {
2491 struct btrfs_trans_handle *trans;
2492 struct btrfs_key location;
2494 ret = check_dir_conflict(root, backref->name,
2500 * let nlink fixing routine to handle it,
2501 * which can do it better.
2506 location.objectid = rec->ino;
2507 location.type = BTRFS_INODE_ITEM_KEY;
2508 location.offset = 0;
2510 trans = btrfs_start_transaction(root, 1);
2511 if (IS_ERR(trans)) {
2512 ret = PTR_ERR(trans);
2515 fprintf(stderr, "adding missing dir index/item pair "
2517 (unsigned long long)rec->ino);
2518 ret = btrfs_insert_dir_item(trans, root, backref->name,
2520 backref->dir, &location,
2521 imode_to_type(rec->imode),
2524 btrfs_commit_transaction(trans, root);
2528 if (!delete && (backref->found_inode_ref &&
2529 backref->found_dir_index &&
2530 backref->found_dir_item &&
2531 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2532 !rec->found_inode_item)) {
2533 ret = create_inode_item(root, rec, backref, 0);
2540 return ret ? ret : repaired;
2544 * To determine the file type for nlink/inode_item repair
2546 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2547 * Return -ENOENT if file type is not found.
2549 static int find_file_type(struct inode_record *rec, u8 *type)
2551 struct inode_backref *backref;
2553 /* For inode item recovered case */
2554 if (rec->found_inode_item) {
2555 *type = imode_to_type(rec->imode);
2559 list_for_each_entry(backref, &rec->backrefs, list) {
2560 if (backref->found_dir_index || backref->found_dir_item) {
2561 *type = backref->filetype;
2569 * To determine the file name for nlink repair
2571 * Return 0 if file name is found, set name and namelen.
2572 * Return -ENOENT if file name is not found.
2574 static int find_file_name(struct inode_record *rec,
2575 char *name, int *namelen)
2577 struct inode_backref *backref;
2579 list_for_each_entry(backref, &rec->backrefs, list) {
2580 if (backref->found_dir_index || backref->found_dir_item ||
2581 backref->found_inode_ref) {
2582 memcpy(name, backref->name, backref->namelen);
2583 *namelen = backref->namelen;
2590 /* Reset the nlink of the inode to the correct one */
2591 static int reset_nlink(struct btrfs_trans_handle *trans,
2592 struct btrfs_root *root,
2593 struct btrfs_path *path,
2594 struct inode_record *rec)
2596 struct inode_backref *backref;
2597 struct inode_backref *tmp;
2598 struct btrfs_key key;
2599 struct btrfs_inode_item *inode_item;
2602 /* We don't believe this either, reset it and iterate backref */
2603 rec->found_link = 0;
2605 /* Remove all backref including the valid ones */
2606 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2607 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2608 backref->index, backref->name,
2609 backref->namelen, 0);
2613 /* remove invalid backref, so it won't be added back */
2614 if (!(backref->found_dir_index &&
2615 backref->found_dir_item &&
2616 backref->found_inode_ref)) {
2617 list_del(&backref->list);
2624 /* Set nlink to 0 */
2625 key.objectid = rec->ino;
2626 key.type = BTRFS_INODE_ITEM_KEY;
2628 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2635 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2636 struct btrfs_inode_item);
2637 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2638 btrfs_mark_buffer_dirty(path->nodes[0]);
2639 btrfs_release_path(path);
2642 * Add back valid inode_ref/dir_item/dir_index,
2643 * add_link() will handle the nlink inc, so new nlink must be correct
2645 list_for_each_entry(backref, &rec->backrefs, list) {
2646 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2647 backref->name, backref->namelen,
2648 backref->filetype, &backref->index, 1);
2653 btrfs_release_path(path);
2657 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2658 struct btrfs_root *root,
2659 struct btrfs_path *path,
2660 struct inode_record *rec)
2662 char *dir_name = "lost+found";
2663 char namebuf[BTRFS_NAME_LEN] = {0};
2668 int name_recovered = 0;
2669 int type_recovered = 0;
2673 * Get file name and type first before these invalid inode ref
2674 * are deleted by remove_all_invalid_backref()
2676 name_recovered = !find_file_name(rec, namebuf, &namelen);
2677 type_recovered = !find_file_type(rec, &type);
2679 if (!name_recovered) {
2680 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2681 rec->ino, rec->ino);
2682 namelen = count_digits(rec->ino);
2683 sprintf(namebuf, "%llu", rec->ino);
2686 if (!type_recovered) {
2687 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2689 type = BTRFS_FT_REG_FILE;
2693 ret = reset_nlink(trans, root, path, rec);
2696 "Failed to reset nlink for inode %llu: %s\n",
2697 rec->ino, strerror(-ret));
2701 if (rec->found_link == 0) {
2702 lost_found_ino = root->highest_inode;
2703 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2708 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2709 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2712 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2713 dir_name, strerror(-ret));
2716 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2717 namebuf, namelen, type, NULL, 1);
2719 * Add ".INO" suffix several times to handle case where
2720 * "FILENAME.INO" is already taken by another file.
2722 while (ret == -EEXIST) {
2724 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2726 if (namelen + count_digits(rec->ino) + 1 >
2731 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2733 namelen += count_digits(rec->ino) + 1;
2734 ret = btrfs_add_link(trans, root, rec->ino,
2735 lost_found_ino, namebuf,
2736 namelen, type, NULL, 1);
2740 "Failed to link the inode %llu to %s dir: %s\n",
2741 rec->ino, dir_name, strerror(-ret));
2745 * Just increase the found_link, don't actually add the
2746 * backref. This will make things easier and this inode
2747 * record will be freed after the repair is done.
2748 * So fsck will not report problem about this inode.
2751 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2752 namelen, namebuf, dir_name);
2754 printf("Fixed the nlink of inode %llu\n", rec->ino);
2757 * Clear the flag anyway, or we will loop forever for the same inode
2758 * as it will not be removed from the bad inode list and the dead loop
2761 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2762 btrfs_release_path(path);
2767 * Check if there is any normal(reg or prealloc) file extent for given
2769 * This is used to determine the file type when neither its dir_index/item or
2770 * inode_item exists.
2772 * This will *NOT* report error, if any error happens, just consider it does
2773 * not have any normal file extent.
2775 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2777 struct btrfs_path *path;
2778 struct btrfs_key key;
2779 struct btrfs_key found_key;
2780 struct btrfs_file_extent_item *fi;
2784 path = btrfs_alloc_path();
2788 key.type = BTRFS_EXTENT_DATA_KEY;
2791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2796 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2797 ret = btrfs_next_leaf(root, path);
2804 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2806 if (found_key.objectid != ino ||
2807 found_key.type != BTRFS_EXTENT_DATA_KEY)
2809 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2810 struct btrfs_file_extent_item);
2811 type = btrfs_file_extent_type(path->nodes[0], fi);
2812 if (type != BTRFS_FILE_EXTENT_INLINE) {
2818 btrfs_free_path(path);
2822 static u32 btrfs_type_to_imode(u8 type)
2824 static u32 imode_by_btrfs_type[] = {
2825 [BTRFS_FT_REG_FILE] = S_IFREG,
2826 [BTRFS_FT_DIR] = S_IFDIR,
2827 [BTRFS_FT_CHRDEV] = S_IFCHR,
2828 [BTRFS_FT_BLKDEV] = S_IFBLK,
2829 [BTRFS_FT_FIFO] = S_IFIFO,
2830 [BTRFS_FT_SOCK] = S_IFSOCK,
2831 [BTRFS_FT_SYMLINK] = S_IFLNK,
2834 return imode_by_btrfs_type[(type)];
2837 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2838 struct btrfs_root *root,
2839 struct btrfs_path *path,
2840 struct inode_record *rec)
2844 int type_recovered = 0;
2847 printf("Trying to rebuild inode:%llu\n", rec->ino);
2849 type_recovered = !find_file_type(rec, &filetype);
2852 * Try to determine inode type if type not found.
2854 * For found regular file extent, it must be FILE.
2855 * For found dir_item/index, it must be DIR.
2857 * For undetermined one, use FILE as fallback.
2860 * 1. If found backref(inode_index/item is already handled) to it,
2862 * Need new inode-inode ref structure to allow search for that.
2864 if (!type_recovered) {
2865 if (rec->found_file_extent &&
2866 find_normal_file_extent(root, rec->ino)) {
2868 filetype = BTRFS_FT_REG_FILE;
2869 } else if (rec->found_dir_item) {
2871 filetype = BTRFS_FT_DIR;
2872 } else if (!list_empty(&rec->orphan_extents)) {
2874 filetype = BTRFS_FT_REG_FILE;
2876 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2879 filetype = BTRFS_FT_REG_FILE;
2883 ret = btrfs_new_inode(trans, root, rec->ino,
2884 mode | btrfs_type_to_imode(filetype));
2889 * Here inode rebuild is done, we only rebuild the inode item,
2890 * don't repair the nlink(like move to lost+found).
2891 * That is the job of nlink repair.
2893 * We just fill the record and return
2895 rec->found_dir_item = 1;
2896 rec->imode = mode | btrfs_type_to_imode(filetype);
2898 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2899 /* Ensure the inode_nlinks repair function will be called */
2900 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2905 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct orphan_data_extent *orphan;
2911 struct orphan_data_extent *tmp;
2914 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2916 * Check for conflicting file extents
2918 * Here we don't know whether the extents is compressed or not,
2919 * so we can only assume it not compressed nor data offset,
2920 * and use its disk_len as extent length.
2922 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2923 orphan->offset, orphan->disk_len, 0);
2924 btrfs_release_path(path);
2929 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2930 orphan->disk_bytenr, orphan->disk_len);
2931 ret = btrfs_free_extent(trans,
2932 root->fs_info->extent_root,
2933 orphan->disk_bytenr, orphan->disk_len,
2934 0, root->objectid, orphan->objectid,
2939 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2940 orphan->offset, orphan->disk_bytenr,
2941 orphan->disk_len, orphan->disk_len);
2945 /* Update file size info */
2946 rec->found_size += orphan->disk_len;
2947 if (rec->found_size == rec->nbytes)
2948 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2950 /* Update the file extent hole info too */
2951 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2955 if (RB_EMPTY_ROOT(&rec->holes))
2956 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2958 list_del(&orphan->list);
2961 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2966 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2967 struct btrfs_root *root,
2968 struct btrfs_path *path,
2969 struct inode_record *rec)
2971 struct rb_node *node;
2972 struct file_extent_hole *hole;
2976 node = rb_first(&rec->holes);
2980 hole = rb_entry(node, struct file_extent_hole, node);
2981 ret = btrfs_punch_hole(trans, root, rec->ino,
2982 hole->start, hole->len);
2985 ret = del_file_extent_hole(&rec->holes, hole->start,
2989 if (RB_EMPTY_ROOT(&rec->holes))
2990 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2991 node = rb_first(&rec->holes);
2993 /* special case for a file losing all its file extent */
2995 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2996 round_up(rec->isize, root->sectorsize));
3000 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3001 rec->ino, root->objectid);
3006 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3008 struct btrfs_trans_handle *trans;
3009 struct btrfs_path *path;
3012 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3013 I_ERR_NO_ORPHAN_ITEM |
3014 I_ERR_LINK_COUNT_WRONG |
3015 I_ERR_NO_INODE_ITEM |
3016 I_ERR_FILE_EXTENT_ORPHAN |
3017 I_ERR_FILE_EXTENT_DISCOUNT|
3018 I_ERR_FILE_NBYTES_WRONG)))
3021 path = btrfs_alloc_path();
3026 * For nlink repair, it may create a dir and add link, so
3027 * 2 for parent(256)'s dir_index and dir_item
3028 * 2 for lost+found dir's inode_item and inode_ref
3029 * 1 for the new inode_ref of the file
3030 * 2 for lost+found dir's dir_index and dir_item for the file
3032 trans = btrfs_start_transaction(root, 7);
3033 if (IS_ERR(trans)) {
3034 btrfs_free_path(path);
3035 return PTR_ERR(trans);
3038 if (rec->errors & I_ERR_NO_INODE_ITEM)
3039 ret = repair_inode_no_item(trans, root, path, rec);
3040 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3041 ret = repair_inode_orphan_extent(trans, root, path, rec);
3042 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3043 ret = repair_inode_discount_extent(trans, root, path, rec);
3044 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3045 ret = repair_inode_isize(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3047 ret = repair_inode_orphan_item(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3049 ret = repair_inode_nlinks(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3051 ret = repair_inode_nbytes(trans, root, path, rec);
3052 btrfs_commit_transaction(trans, root);
3053 btrfs_free_path(path);
3057 static int check_inode_recs(struct btrfs_root *root,
3058 struct cache_tree *inode_cache)
3060 struct cache_extent *cache;
3061 struct ptr_node *node;
3062 struct inode_record *rec;
3063 struct inode_backref *backref;
3068 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3070 if (btrfs_root_refs(&root->root_item) == 0) {
3071 if (!cache_tree_empty(inode_cache))
3072 fprintf(stderr, "warning line %d\n", __LINE__);
3077 * We need to record the highest inode number for later 'lost+found'
3079 * We must select an ino not used/referred by any existing inode, or
3080 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3081 * this may cause 'lost+found' dir has wrong nlinks.
3083 cache = last_cache_extent(inode_cache);
3085 node = container_of(cache, struct ptr_node, cache);
3087 if (rec->ino > root->highest_inode)
3088 root->highest_inode = rec->ino;
3092 * We need to repair backrefs first because we could change some of the
3093 * errors in the inode recs.
3095 * We also need to go through and delete invalid backrefs first and then
3096 * add the correct ones second. We do this because we may get EEXIST
3097 * when adding back the correct index because we hadn't yet deleted the
3100 * For example, if we were missing a dir index then the directories
3101 * isize would be wrong, so if we fixed the isize to what we thought it
3102 * would be and then fixed the backref we'd still have a invalid fs, so
3103 * we need to add back the dir index and then check to see if the isize
3108 if (stage == 3 && !err)
3111 cache = search_cache_extent(inode_cache, 0);
3112 while (repair && cache) {
3113 node = container_of(cache, struct ptr_node, cache);
3115 cache = next_cache_extent(cache);
3117 /* Need to free everything up and rescan */
3119 remove_cache_extent(inode_cache, &node->cache);
3121 free_inode_rec(rec);
3125 if (list_empty(&rec->backrefs))
3128 ret = repair_inode_backrefs(root, rec, inode_cache,
3142 rec = get_inode_rec(inode_cache, root_dirid, 0);
3143 BUG_ON(IS_ERR(rec));
3145 ret = check_root_dir(rec);
3147 fprintf(stderr, "root %llu root dir %llu error\n",
3148 (unsigned long long)root->root_key.objectid,
3149 (unsigned long long)root_dirid);
3150 print_inode_error(root, rec);
3155 struct btrfs_trans_handle *trans;
3157 trans = btrfs_start_transaction(root, 1);
3158 if (IS_ERR(trans)) {
3159 err = PTR_ERR(trans);
3164 "root %llu missing its root dir, recreating\n",
3165 (unsigned long long)root->objectid);
3167 ret = btrfs_make_root_dir(trans, root, root_dirid);
3170 btrfs_commit_transaction(trans, root);
3174 fprintf(stderr, "root %llu root dir %llu not found\n",
3175 (unsigned long long)root->root_key.objectid,
3176 (unsigned long long)root_dirid);
3180 cache = search_cache_extent(inode_cache, 0);
3183 node = container_of(cache, struct ptr_node, cache);
3185 remove_cache_extent(inode_cache, &node->cache);
3187 if (rec->ino == root_dirid ||
3188 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3189 free_inode_rec(rec);
3193 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3194 ret = check_orphan_item(root, rec->ino);
3196 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3197 if (can_free_inode_rec(rec)) {
3198 free_inode_rec(rec);
3203 if (!rec->found_inode_item)
3204 rec->errors |= I_ERR_NO_INODE_ITEM;
3205 if (rec->found_link != rec->nlink)
3206 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3208 ret = try_repair_inode(root, rec);
3209 if (ret == 0 && can_free_inode_rec(rec)) {
3210 free_inode_rec(rec);
3216 if (!(repair && ret == 0))
3218 print_inode_error(root, rec);
3219 list_for_each_entry(backref, &rec->backrefs, list) {
3220 if (!backref->found_dir_item)
3221 backref->errors |= REF_ERR_NO_DIR_ITEM;
3222 if (!backref->found_dir_index)
3223 backref->errors |= REF_ERR_NO_DIR_INDEX;
3224 if (!backref->found_inode_ref)
3225 backref->errors |= REF_ERR_NO_INODE_REF;
3226 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3227 " namelen %u name %s filetype %d errors %x",
3228 (unsigned long long)backref->dir,
3229 (unsigned long long)backref->index,
3230 backref->namelen, backref->name,
3231 backref->filetype, backref->errors);
3232 print_ref_error(backref->errors);
3234 free_inode_rec(rec);
3236 return (error > 0) ? -1 : 0;
3239 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3242 struct cache_extent *cache;
3243 struct root_record *rec = NULL;
3246 cache = lookup_cache_extent(root_cache, objectid, 1);
3248 rec = container_of(cache, struct root_record, cache);
3250 rec = calloc(1, sizeof(*rec));
3252 return ERR_PTR(-ENOMEM);
3253 rec->objectid = objectid;
3254 INIT_LIST_HEAD(&rec->backrefs);
3255 rec->cache.start = objectid;
3256 rec->cache.size = 1;
3258 ret = insert_cache_extent(root_cache, &rec->cache);
3260 return ERR_PTR(-EEXIST);
3265 static struct root_backref *get_root_backref(struct root_record *rec,
3266 u64 ref_root, u64 dir, u64 index,
3267 const char *name, int namelen)
3269 struct root_backref *backref;
3271 list_for_each_entry(backref, &rec->backrefs, list) {
3272 if (backref->ref_root != ref_root || backref->dir != dir ||
3273 backref->namelen != namelen)
3275 if (memcmp(name, backref->name, namelen))
3280 backref = calloc(1, sizeof(*backref) + namelen + 1);
3283 backref->ref_root = ref_root;
3285 backref->index = index;
3286 backref->namelen = namelen;
3287 memcpy(backref->name, name, namelen);
3288 backref->name[namelen] = '\0';
3289 list_add_tail(&backref->list, &rec->backrefs);
3293 static void free_root_record(struct cache_extent *cache)
3295 struct root_record *rec;
3296 struct root_backref *backref;
3298 rec = container_of(cache, struct root_record, cache);
3299 while (!list_empty(&rec->backrefs)) {
3300 backref = to_root_backref(rec->backrefs.next);
3301 list_del(&backref->list);
3308 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3310 static int add_root_backref(struct cache_tree *root_cache,
3311 u64 root_id, u64 ref_root, u64 dir, u64 index,
3312 const char *name, int namelen,
3313 int item_type, int errors)
3315 struct root_record *rec;
3316 struct root_backref *backref;
3318 rec = get_root_rec(root_cache, root_id);
3319 BUG_ON(IS_ERR(rec));
3320 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3323 backref->errors |= errors;
3325 if (item_type != BTRFS_DIR_ITEM_KEY) {
3326 if (backref->found_dir_index || backref->found_back_ref ||
3327 backref->found_forward_ref) {
3328 if (backref->index != index)
3329 backref->errors |= REF_ERR_INDEX_UNMATCH;
3331 backref->index = index;
3335 if (item_type == BTRFS_DIR_ITEM_KEY) {
3336 if (backref->found_forward_ref)
3338 backref->found_dir_item = 1;
3339 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3340 backref->found_dir_index = 1;
3341 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3342 if (backref->found_forward_ref)
3343 backref->errors |= REF_ERR_DUP_ROOT_REF;
3344 else if (backref->found_dir_item)
3346 backref->found_forward_ref = 1;
3347 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3348 if (backref->found_back_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3350 backref->found_back_ref = 1;
3355 if (backref->found_forward_ref && backref->found_dir_item)
3356 backref->reachable = 1;
3360 static int merge_root_recs(struct btrfs_root *root,
3361 struct cache_tree *src_cache,
3362 struct cache_tree *dst_cache)
3364 struct cache_extent *cache;
3365 struct ptr_node *node;
3366 struct inode_record *rec;
3367 struct inode_backref *backref;
3370 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3371 free_inode_recs_tree(src_cache);
3376 cache = search_cache_extent(src_cache, 0);
3379 node = container_of(cache, struct ptr_node, cache);
3381 remove_cache_extent(src_cache, &node->cache);
3384 ret = is_child_root(root, root->objectid, rec->ino);
3390 list_for_each_entry(backref, &rec->backrefs, list) {
3391 BUG_ON(backref->found_inode_ref);
3392 if (backref->found_dir_item)
3393 add_root_backref(dst_cache, rec->ino,
3394 root->root_key.objectid, backref->dir,
3395 backref->index, backref->name,
3396 backref->namelen, BTRFS_DIR_ITEM_KEY,
3398 if (backref->found_dir_index)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_INDEX_KEY,
3406 free_inode_rec(rec);
3413 static int check_root_refs(struct btrfs_root *root,
3414 struct cache_tree *root_cache)
3416 struct root_record *rec;
3417 struct root_record *ref_root;
3418 struct root_backref *backref;
3419 struct cache_extent *cache;
3425 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3426 BUG_ON(IS_ERR(rec));
3429 /* fixme: this can not detect circular references */
3432 cache = search_cache_extent(root_cache, 0);
3436 rec = container_of(cache, struct root_record, cache);
3437 cache = next_cache_extent(cache);
3439 if (rec->found_ref == 0)
3442 list_for_each_entry(backref, &rec->backrefs, list) {
3443 if (!backref->reachable)
3446 ref_root = get_root_rec(root_cache,
3448 BUG_ON(IS_ERR(ref_root));
3449 if (ref_root->found_ref > 0)
3452 backref->reachable = 0;
3454 if (rec->found_ref == 0)
3460 cache = search_cache_extent(root_cache, 0);
3464 rec = container_of(cache, struct root_record, cache);
3465 cache = next_cache_extent(cache);
3467 if (rec->found_ref == 0 &&
3468 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3469 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3470 ret = check_orphan_item(root->fs_info->tree_root,
3476 * If we don't have a root item then we likely just have
3477 * a dir item in a snapshot for this root but no actual
3478 * ref key or anything so it's meaningless.
3480 if (!rec->found_root_item)
3483 fprintf(stderr, "fs tree %llu not referenced\n",
3484 (unsigned long long)rec->objectid);
3488 if (rec->found_ref > 0 && !rec->found_root_item)
3490 list_for_each_entry(backref, &rec->backrefs, list) {
3491 if (!backref->found_dir_item)
3492 backref->errors |= REF_ERR_NO_DIR_ITEM;
3493 if (!backref->found_dir_index)
3494 backref->errors |= REF_ERR_NO_DIR_INDEX;
3495 if (!backref->found_back_ref)
3496 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3497 if (!backref->found_forward_ref)
3498 backref->errors |= REF_ERR_NO_ROOT_REF;
3499 if (backref->reachable && backref->errors)
3506 fprintf(stderr, "fs tree %llu refs %u %s\n",
3507 (unsigned long long)rec->objectid, rec->found_ref,
3508 rec->found_root_item ? "" : "not found");
3510 list_for_each_entry(backref, &rec->backrefs, list) {
3511 if (!backref->reachable)
3513 if (!backref->errors && rec->found_root_item)
3515 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3516 " index %llu namelen %u name %s errors %x\n",
3517 (unsigned long long)backref->ref_root,
3518 (unsigned long long)backref->dir,
3519 (unsigned long long)backref->index,
3520 backref->namelen, backref->name,
3522 print_ref_error(backref->errors);
3525 return errors > 0 ? 1 : 0;
3528 static int process_root_ref(struct extent_buffer *eb, int slot,
3529 struct btrfs_key *key,
3530 struct cache_tree *root_cache)
3536 struct btrfs_root_ref *ref;
3537 char namebuf[BTRFS_NAME_LEN];
3540 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3542 dirid = btrfs_root_ref_dirid(eb, ref);
3543 index = btrfs_root_ref_sequence(eb, ref);
3544 name_len = btrfs_root_ref_name_len(eb, ref);
3546 if (name_len <= BTRFS_NAME_LEN) {
3550 len = BTRFS_NAME_LEN;
3551 error = REF_ERR_NAME_TOO_LONG;
3553 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3555 if (key->type == BTRFS_ROOT_REF_KEY) {
3556 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3557 index, namebuf, len, key->type, error);
3559 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3560 index, namebuf, len, key->type, error);
3565 static void free_corrupt_block(struct cache_extent *cache)
3567 struct btrfs_corrupt_block *corrupt;
3569 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3573 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3576 * Repair the btree of the given root.
3578 * The fix is to remove the node key in corrupt_blocks cache_tree.
3579 * and rebalance the tree.
3580 * After the fix, the btree should be writeable.
3582 static int repair_btree(struct btrfs_root *root,
3583 struct cache_tree *corrupt_blocks)
3585 struct btrfs_trans_handle *trans;
3586 struct btrfs_path *path;
3587 struct btrfs_corrupt_block *corrupt;
3588 struct cache_extent *cache;
3589 struct btrfs_key key;
3594 if (cache_tree_empty(corrupt_blocks))
3597 path = btrfs_alloc_path();
3601 trans = btrfs_start_transaction(root, 1);
3602 if (IS_ERR(trans)) {
3603 ret = PTR_ERR(trans);
3604 fprintf(stderr, "Error starting transaction: %s\n",
3608 cache = first_cache_extent(corrupt_blocks);
3610 corrupt = container_of(cache, struct btrfs_corrupt_block,
3612 level = corrupt->level;
3613 path->lowest_level = level;
3614 key.objectid = corrupt->key.objectid;
3615 key.type = corrupt->key.type;
3616 key.offset = corrupt->key.offset;
3619 * Here we don't want to do any tree balance, since it may
3620 * cause a balance with corrupted brother leaf/node,
3621 * so ins_len set to 0 here.
3622 * Balance will be done after all corrupt node/leaf is deleted.
3624 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3627 offset = btrfs_node_blockptr(path->nodes[level],
3628 path->slots[level]);
3630 /* Remove the ptr */
3631 ret = btrfs_del_ptr(trans, root, path, level,
3632 path->slots[level]);
3636 * Remove the corresponding extent
3637 * return value is not concerned.
3639 btrfs_release_path(path);
3640 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3641 0, root->root_key.objectid,
3643 cache = next_cache_extent(cache);
3646 /* Balance the btree using btrfs_search_slot() */
3647 cache = first_cache_extent(corrupt_blocks);
3649 corrupt = container_of(cache, struct btrfs_corrupt_block,
3651 memcpy(&key, &corrupt->key, sizeof(key));
3652 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3655 /* return will always >0 since it won't find the item */
3657 btrfs_release_path(path);
3658 cache = next_cache_extent(cache);
3661 btrfs_commit_transaction(trans, root);
3663 btrfs_free_path(path);
3667 static int check_fs_root(struct btrfs_root *root,
3668 struct cache_tree *root_cache,
3669 struct walk_control *wc)
3675 struct btrfs_path path;
3676 struct shared_node root_node;
3677 struct root_record *rec;
3678 struct btrfs_root_item *root_item = &root->root_item;
3679 struct cache_tree corrupt_blocks;
3680 struct orphan_data_extent *orphan;
3681 struct orphan_data_extent *tmp;
3682 enum btrfs_tree_block_status status;
3683 struct node_refs nrefs;
3686 * Reuse the corrupt_block cache tree to record corrupted tree block
3688 * Unlike the usage in extent tree check, here we do it in a per
3689 * fs/subvol tree base.
3691 cache_tree_init(&corrupt_blocks);
3692 root->fs_info->corrupt_blocks = &corrupt_blocks;
3694 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3695 rec = get_root_rec(root_cache, root->root_key.objectid);
3696 BUG_ON(IS_ERR(rec));
3697 if (btrfs_root_refs(root_item) > 0)
3698 rec->found_root_item = 1;
3701 btrfs_init_path(&path);
3702 memset(&root_node, 0, sizeof(root_node));
3703 cache_tree_init(&root_node.root_cache);
3704 cache_tree_init(&root_node.inode_cache);
3705 memset(&nrefs, 0, sizeof(nrefs));
3707 /* Move the orphan extent record to corresponding inode_record */
3708 list_for_each_entry_safe(orphan, tmp,
3709 &root->orphan_data_extents, list) {
3710 struct inode_record *inode;
3712 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3714 BUG_ON(IS_ERR(inode));
3715 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3716 list_move(&orphan->list, &inode->orphan_extents);
3719 level = btrfs_header_level(root->node);
3720 memset(wc->nodes, 0, sizeof(wc->nodes));
3721 wc->nodes[level] = &root_node;
3722 wc->active_node = level;
3723 wc->root_level = level;
3725 /* We may not have checked the root block, lets do that now */
3726 if (btrfs_is_leaf(root->node))
3727 status = btrfs_check_leaf(root, NULL, root->node);
3729 status = btrfs_check_node(root, NULL, root->node);
3730 if (status != BTRFS_TREE_BLOCK_CLEAN)
3733 if (btrfs_root_refs(root_item) > 0 ||
3734 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3735 path.nodes[level] = root->node;
3736 extent_buffer_get(root->node);
3737 path.slots[level] = 0;
3739 struct btrfs_key key;
3740 struct btrfs_disk_key found_key;
3742 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3743 level = root_item->drop_level;
3744 path.lowest_level = level;
3745 if (level > btrfs_header_level(root->node) ||
3746 level >= BTRFS_MAX_LEVEL) {
3747 error("ignoring invalid drop level: %u", level);
3750 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3753 btrfs_node_key(path.nodes[level], &found_key,
3755 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3756 sizeof(found_key)));
3760 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3766 wret = walk_up_tree(root, &path, wc, &level);
3773 btrfs_release_path(&path);
3775 if (!cache_tree_empty(&corrupt_blocks)) {
3776 struct cache_extent *cache;
3777 struct btrfs_corrupt_block *corrupt;
3779 printf("The following tree block(s) is corrupted in tree %llu:\n",
3780 root->root_key.objectid);
3781 cache = first_cache_extent(&corrupt_blocks);
3783 corrupt = container_of(cache,
3784 struct btrfs_corrupt_block,
3786 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3787 cache->start, corrupt->level,
3788 corrupt->key.objectid, corrupt->key.type,
3789 corrupt->key.offset);
3790 cache = next_cache_extent(cache);
3793 printf("Try to repair the btree for root %llu\n",
3794 root->root_key.objectid);
3795 ret = repair_btree(root, &corrupt_blocks);
3797 fprintf(stderr, "Failed to repair btree: %s\n",
3800 printf("Btree for root %llu is fixed\n",
3801 root->root_key.objectid);
3805 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3809 if (root_node.current) {
3810 root_node.current->checked = 1;
3811 maybe_free_inode_rec(&root_node.inode_cache,
3815 err = check_inode_recs(root, &root_node.inode_cache);
3819 free_corrupt_blocks_tree(&corrupt_blocks);
3820 root->fs_info->corrupt_blocks = NULL;
3821 free_orphan_data_extents(&root->orphan_data_extents);
3825 static int fs_root_objectid(u64 objectid)
3827 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3828 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3830 return is_fstree(objectid);
3833 static int check_fs_roots(struct btrfs_root *root,
3834 struct cache_tree *root_cache)
3836 struct btrfs_path path;
3837 struct btrfs_key key;
3838 struct walk_control wc;
3839 struct extent_buffer *leaf, *tree_node;
3840 struct btrfs_root *tmp_root;
3841 struct btrfs_root *tree_root = root->fs_info->tree_root;
3845 if (ctx.progress_enabled) {
3846 ctx.tp = TASK_FS_ROOTS;
3847 task_start(ctx.info);
3851 * Just in case we made any changes to the extent tree that weren't
3852 * reflected into the free space cache yet.
3855 reset_cached_block_groups(root->fs_info);
3856 memset(&wc, 0, sizeof(wc));
3857 cache_tree_init(&wc.shared);
3858 btrfs_init_path(&path);
3863 key.type = BTRFS_ROOT_ITEM_KEY;
3864 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3869 tree_node = tree_root->node;
3871 if (tree_node != tree_root->node) {
3872 free_root_recs_tree(root_cache);
3873 btrfs_release_path(&path);
3876 leaf = path.nodes[0];
3877 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3878 ret = btrfs_next_leaf(tree_root, &path);
3884 leaf = path.nodes[0];
3886 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3887 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3888 fs_root_objectid(key.objectid)) {
3889 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3890 tmp_root = btrfs_read_fs_root_no_cache(
3891 root->fs_info, &key);
3893 key.offset = (u64)-1;
3894 tmp_root = btrfs_read_fs_root(
3895 root->fs_info, &key);
3897 if (IS_ERR(tmp_root)) {
3901 ret = check_fs_root(tmp_root, root_cache, &wc);
3902 if (ret == -EAGAIN) {
3903 free_root_recs_tree(root_cache);
3904 btrfs_release_path(&path);
3909 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3910 btrfs_free_fs_root(tmp_root);
3911 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3912 key.type == BTRFS_ROOT_BACKREF_KEY) {
3913 process_root_ref(leaf, path.slots[0], &key,
3920 btrfs_release_path(&path);
3922 free_extent_cache_tree(&wc.shared);
3923 if (!cache_tree_empty(&wc.shared))
3924 fprintf(stderr, "warning line %d\n", __LINE__);
3926 task_stop(ctx.info);
3931 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3934 struct extent_backref *back;
3935 struct tree_backref *tback;
3936 struct data_backref *dback;
3940 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3941 back = rb_node_to_extent_backref(n);
3942 if (!back->found_extent_tree) {
3946 if (back->is_data) {
3947 dback = to_data_backref(back);
3948 fprintf(stderr, "Backref %llu %s %llu"
3949 " owner %llu offset %llu num_refs %lu"
3950 " not found in extent tree\n",
3951 (unsigned long long)rec->start,
3952 back->full_backref ?
3954 back->full_backref ?
3955 (unsigned long long)dback->parent:
3956 (unsigned long long)dback->root,
3957 (unsigned long long)dback->owner,
3958 (unsigned long long)dback->offset,
3959 (unsigned long)dback->num_refs);
3961 tback = to_tree_backref(back);
3962 fprintf(stderr, "Backref %llu parent %llu"
3963 " root %llu not found in extent tree\n",
3964 (unsigned long long)rec->start,
3965 (unsigned long long)tback->parent,
3966 (unsigned long long)tback->root);
3969 if (!back->is_data && !back->found_ref) {
3973 tback = to_tree_backref(back);
3974 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3975 (unsigned long long)rec->start,
3976 back->full_backref ? "parent" : "root",
3977 back->full_backref ?
3978 (unsigned long long)tback->parent :
3979 (unsigned long long)tback->root, back);
3981 if (back->is_data) {
3982 dback = to_data_backref(back);
3983 if (dback->found_ref != dback->num_refs) {
3987 fprintf(stderr, "Incorrect local backref count"
3988 " on %llu %s %llu owner %llu"
3989 " offset %llu found %u wanted %u back %p\n",
3990 (unsigned long long)rec->start,
3991 back->full_backref ?
3993 back->full_backref ?
3994 (unsigned long long)dback->parent:
3995 (unsigned long long)dback->root,
3996 (unsigned long long)dback->owner,
3997 (unsigned long long)dback->offset,
3998 dback->found_ref, dback->num_refs, back);
4000 if (dback->disk_bytenr != rec->start) {
4004 fprintf(stderr, "Backref disk bytenr does not"
4005 " match extent record, bytenr=%llu, "
4006 "ref bytenr=%llu\n",
4007 (unsigned long long)rec->start,
4008 (unsigned long long)dback->disk_bytenr);
4011 if (dback->bytes != rec->nr) {
4015 fprintf(stderr, "Backref bytes do not match "
4016 "extent backref, bytenr=%llu, ref "
4017 "bytes=%llu, backref bytes=%llu\n",
4018 (unsigned long long)rec->start,
4019 (unsigned long long)rec->nr,
4020 (unsigned long long)dback->bytes);
4023 if (!back->is_data) {
4026 dback = to_data_backref(back);
4027 found += dback->found_ref;
4030 if (found != rec->refs) {
4034 fprintf(stderr, "Incorrect global backref count "
4035 "on %llu found %llu wanted %llu\n",
4036 (unsigned long long)rec->start,
4037 (unsigned long long)found,
4038 (unsigned long long)rec->refs);
4044 static void __free_one_backref(struct rb_node *node)
4046 struct extent_backref *back = rb_node_to_extent_backref(node);
4051 static void free_all_extent_backrefs(struct extent_record *rec)
4053 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4056 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4057 struct cache_tree *extent_cache)
4059 struct cache_extent *cache;
4060 struct extent_record *rec;
4063 cache = first_cache_extent(extent_cache);
4066 rec = container_of(cache, struct extent_record, cache);
4067 remove_cache_extent(extent_cache, cache);
4068 free_all_extent_backrefs(rec);
4073 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4074 struct extent_record *rec)
4076 if (rec->content_checked && rec->owner_ref_checked &&
4077 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4078 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4079 !rec->bad_full_backref && !rec->crossing_stripes &&
4080 !rec->wrong_chunk_type) {
4081 remove_cache_extent(extent_cache, &rec->cache);
4082 free_all_extent_backrefs(rec);
4083 list_del_init(&rec->list);
4089 static int check_owner_ref(struct btrfs_root *root,
4090 struct extent_record *rec,
4091 struct extent_buffer *buf)
4093 struct extent_backref *node, *tmp;
4094 struct tree_backref *back;
4095 struct btrfs_root *ref_root;
4096 struct btrfs_key key;
4097 struct btrfs_path path;
4098 struct extent_buffer *parent;
4103 rbtree_postorder_for_each_entry_safe(node, tmp,
4104 &rec->backref_tree, node) {
4107 if (!node->found_ref)
4109 if (node->full_backref)
4111 back = to_tree_backref(node);
4112 if (btrfs_header_owner(buf) == back->root)
4115 BUG_ON(rec->is_root);
4117 /* try to find the block by search corresponding fs tree */
4118 key.objectid = btrfs_header_owner(buf);
4119 key.type = BTRFS_ROOT_ITEM_KEY;
4120 key.offset = (u64)-1;
4122 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4123 if (IS_ERR(ref_root))
4126 level = btrfs_header_level(buf);
4128 btrfs_item_key_to_cpu(buf, &key, 0);
4130 btrfs_node_key_to_cpu(buf, &key, 0);
4132 btrfs_init_path(&path);
4133 path.lowest_level = level + 1;
4134 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4138 parent = path.nodes[level + 1];
4139 if (parent && buf->start == btrfs_node_blockptr(parent,
4140 path.slots[level + 1]))
4143 btrfs_release_path(&path);
4144 return found ? 0 : 1;
4147 static int is_extent_tree_record(struct extent_record *rec)
4149 struct extent_backref *ref, *tmp;
4150 struct tree_backref *back;
4153 rbtree_postorder_for_each_entry_safe(ref, tmp,
4154 &rec->backref_tree, node) {
4157 back = to_tree_backref(ref);
4158 if (ref->full_backref)
4160 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4167 static int record_bad_block_io(struct btrfs_fs_info *info,
4168 struct cache_tree *extent_cache,
4171 struct extent_record *rec;
4172 struct cache_extent *cache;
4173 struct btrfs_key key;
4175 cache = lookup_cache_extent(extent_cache, start, len);
4179 rec = container_of(cache, struct extent_record, cache);
4180 if (!is_extent_tree_record(rec))
4183 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4184 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4187 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4188 struct extent_buffer *buf, int slot)
4190 if (btrfs_header_level(buf)) {
4191 struct btrfs_key_ptr ptr1, ptr2;
4193 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4194 sizeof(struct btrfs_key_ptr));
4195 read_extent_buffer(buf, &ptr2,
4196 btrfs_node_key_ptr_offset(slot + 1),
4197 sizeof(struct btrfs_key_ptr));
4198 write_extent_buffer(buf, &ptr1,
4199 btrfs_node_key_ptr_offset(slot + 1),
4200 sizeof(struct btrfs_key_ptr));
4201 write_extent_buffer(buf, &ptr2,
4202 btrfs_node_key_ptr_offset(slot),
4203 sizeof(struct btrfs_key_ptr));
4205 struct btrfs_disk_key key;
4206 btrfs_node_key(buf, &key, 0);
4207 btrfs_fixup_low_keys(root, path, &key,
4208 btrfs_header_level(buf) + 1);
4211 struct btrfs_item *item1, *item2;
4212 struct btrfs_key k1, k2;
4213 char *item1_data, *item2_data;
4214 u32 item1_offset, item2_offset, item1_size, item2_size;
4216 item1 = btrfs_item_nr(slot);
4217 item2 = btrfs_item_nr(slot + 1);
4218 btrfs_item_key_to_cpu(buf, &k1, slot);
4219 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4220 item1_offset = btrfs_item_offset(buf, item1);
4221 item2_offset = btrfs_item_offset(buf, item2);
4222 item1_size = btrfs_item_size(buf, item1);
4223 item2_size = btrfs_item_size(buf, item2);
4225 item1_data = malloc(item1_size);
4228 item2_data = malloc(item2_size);
4234 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4235 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4237 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4238 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4242 btrfs_set_item_offset(buf, item1, item2_offset);
4243 btrfs_set_item_offset(buf, item2, item1_offset);
4244 btrfs_set_item_size(buf, item1, item2_size);
4245 btrfs_set_item_size(buf, item2, item1_size);
4247 path->slots[0] = slot;
4248 btrfs_set_item_key_unsafe(root, path, &k2);
4249 path->slots[0] = slot + 1;
4250 btrfs_set_item_key_unsafe(root, path, &k1);
4255 static int fix_key_order(struct btrfs_trans_handle *trans,
4256 struct btrfs_root *root,
4257 struct btrfs_path *path)
4259 struct extent_buffer *buf;
4260 struct btrfs_key k1, k2;
4262 int level = path->lowest_level;
4265 buf = path->nodes[level];
4266 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4268 btrfs_node_key_to_cpu(buf, &k1, i);
4269 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4271 btrfs_item_key_to_cpu(buf, &k1, i);
4272 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4274 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4276 ret = swap_values(root, path, buf, i);
4279 btrfs_mark_buffer_dirty(buf);
4285 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4286 struct btrfs_root *root,
4287 struct btrfs_path *path,
4288 struct extent_buffer *buf, int slot)
4290 struct btrfs_key key;
4291 int nritems = btrfs_header_nritems(buf);
4293 btrfs_item_key_to_cpu(buf, &key, slot);
4295 /* These are all the keys we can deal with missing. */
4296 if (key.type != BTRFS_DIR_INDEX_KEY &&
4297 key.type != BTRFS_EXTENT_ITEM_KEY &&
4298 key.type != BTRFS_METADATA_ITEM_KEY &&
4299 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4300 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4303 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4304 (unsigned long long)key.objectid, key.type,
4305 (unsigned long long)key.offset, slot, buf->start);
4306 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4307 btrfs_item_nr_offset(slot + 1),
4308 sizeof(struct btrfs_item) *
4309 (nritems - slot - 1));
4310 btrfs_set_header_nritems(buf, nritems - 1);
4312 struct btrfs_disk_key disk_key;
4314 btrfs_item_key(buf, &disk_key, 0);
4315 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4317 btrfs_mark_buffer_dirty(buf);
4321 static int fix_item_offset(struct btrfs_trans_handle *trans,
4322 struct btrfs_root *root,
4323 struct btrfs_path *path)
4325 struct extent_buffer *buf;
4329 /* We should only get this for leaves */
4330 BUG_ON(path->lowest_level);
4331 buf = path->nodes[0];
4333 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4334 unsigned int shift = 0, offset;
4336 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4337 BTRFS_LEAF_DATA_SIZE(root)) {
4338 if (btrfs_item_end_nr(buf, i) >
4339 BTRFS_LEAF_DATA_SIZE(root)) {
4340 ret = delete_bogus_item(trans, root, path,
4344 fprintf(stderr, "item is off the end of the "
4345 "leaf, can't fix\n");
4349 shift = BTRFS_LEAF_DATA_SIZE(root) -
4350 btrfs_item_end_nr(buf, i);
4351 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4352 btrfs_item_offset_nr(buf, i - 1)) {
4353 if (btrfs_item_end_nr(buf, i) >
4354 btrfs_item_offset_nr(buf, i - 1)) {
4355 ret = delete_bogus_item(trans, root, path,
4359 fprintf(stderr, "items overlap, can't fix\n");
4363 shift = btrfs_item_offset_nr(buf, i - 1) -
4364 btrfs_item_end_nr(buf, i);
4369 printf("Shifting item nr %d by %u bytes in block %llu\n",
4370 i, shift, (unsigned long long)buf->start);
4371 offset = btrfs_item_offset_nr(buf, i);
4372 memmove_extent_buffer(buf,
4373 btrfs_leaf_data(buf) + offset + shift,
4374 btrfs_leaf_data(buf) + offset,
4375 btrfs_item_size_nr(buf, i));
4376 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4378 btrfs_mark_buffer_dirty(buf);
4382 * We may have moved things, in which case we want to exit so we don't
4383 * write those changes out. Once we have proper abort functionality in
4384 * progs this can be changed to something nicer.
4391 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4392 * then just return -EIO.
4394 static int try_to_fix_bad_block(struct btrfs_root *root,
4395 struct extent_buffer *buf,
4396 enum btrfs_tree_block_status status)
4398 struct btrfs_trans_handle *trans;
4399 struct ulist *roots;
4400 struct ulist_node *node;
4401 struct btrfs_root *search_root;
4402 struct btrfs_path *path;
4403 struct ulist_iterator iter;
4404 struct btrfs_key root_key, key;
4407 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4408 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4411 path = btrfs_alloc_path();
4415 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4418 btrfs_free_path(path);
4422 ULIST_ITER_INIT(&iter);
4423 while ((node = ulist_next(roots, &iter))) {
4424 root_key.objectid = node->val;
4425 root_key.type = BTRFS_ROOT_ITEM_KEY;
4426 root_key.offset = (u64)-1;
4428 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4435 trans = btrfs_start_transaction(search_root, 0);
4436 if (IS_ERR(trans)) {
4437 ret = PTR_ERR(trans);
4441 path->lowest_level = btrfs_header_level(buf);
4442 path->skip_check_block = 1;
4443 if (path->lowest_level)
4444 btrfs_node_key_to_cpu(buf, &key, 0);
4446 btrfs_item_key_to_cpu(buf, &key, 0);
4447 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4450 btrfs_commit_transaction(trans, search_root);
4453 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4454 ret = fix_key_order(trans, search_root, path);
4455 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4456 ret = fix_item_offset(trans, search_root, path);
4458 btrfs_commit_transaction(trans, search_root);
4461 btrfs_release_path(path);
4462 btrfs_commit_transaction(trans, search_root);
4465 btrfs_free_path(path);
4469 static int check_block(struct btrfs_root *root,
4470 struct cache_tree *extent_cache,
4471 struct extent_buffer *buf, u64 flags)
4473 struct extent_record *rec;
4474 struct cache_extent *cache;
4475 struct btrfs_key key;
4476 enum btrfs_tree_block_status status;
4480 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4483 rec = container_of(cache, struct extent_record, cache);
4484 rec->generation = btrfs_header_generation(buf);
4486 level = btrfs_header_level(buf);
4487 if (btrfs_header_nritems(buf) > 0) {
4490 btrfs_item_key_to_cpu(buf, &key, 0);
4492 btrfs_node_key_to_cpu(buf, &key, 0);
4494 rec->info_objectid = key.objectid;
4496 rec->info_level = level;
4498 if (btrfs_is_leaf(buf))
4499 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4501 status = btrfs_check_node(root, &rec->parent_key, buf);
4503 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4505 status = try_to_fix_bad_block(root, buf, status);
4506 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4508 fprintf(stderr, "bad block %llu\n",
4509 (unsigned long long)buf->start);
4512 * Signal to callers we need to start the scan over
4513 * again since we'll have cowed blocks.
4518 rec->content_checked = 1;
4519 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4520 rec->owner_ref_checked = 1;
4522 ret = check_owner_ref(root, rec, buf);
4524 rec->owner_ref_checked = 1;
4528 maybe_free_extent_rec(extent_cache, rec);
4533 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4534 u64 parent, u64 root)
4536 struct rb_node *node;
4537 struct tree_backref *back = NULL;
4538 struct tree_backref match = {
4545 match.parent = parent;
4546 match.node.full_backref = 1;
4551 node = rb_search(&rec->backref_tree, &match.node.node,
4552 (rb_compare_keys)compare_extent_backref, NULL);
4554 back = to_tree_backref(rb_node_to_extent_backref(node));
4559 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4560 u64 parent, u64 root)
4562 struct tree_backref *ref = malloc(sizeof(*ref));
4566 memset(&ref->node, 0, sizeof(ref->node));
4568 ref->parent = parent;
4569 ref->node.full_backref = 1;
4572 ref->node.full_backref = 0;
4574 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4579 static struct data_backref *find_data_backref(struct extent_record *rec,
4580 u64 parent, u64 root,
4581 u64 owner, u64 offset,
4583 u64 disk_bytenr, u64 bytes)
4585 struct rb_node *node;
4586 struct data_backref *back = NULL;
4587 struct data_backref match = {
4594 .found_ref = found_ref,
4595 .disk_bytenr = disk_bytenr,
4599 match.parent = parent;
4600 match.node.full_backref = 1;
4605 node = rb_search(&rec->backref_tree, &match.node.node,
4606 (rb_compare_keys)compare_extent_backref, NULL);
4608 back = to_data_backref(rb_node_to_extent_backref(node));
4613 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4614 u64 parent, u64 root,
4615 u64 owner, u64 offset,
4618 struct data_backref *ref = malloc(sizeof(*ref));
4622 memset(&ref->node, 0, sizeof(ref->node));
4623 ref->node.is_data = 1;
4626 ref->parent = parent;
4629 ref->node.full_backref = 1;
4633 ref->offset = offset;
4634 ref->node.full_backref = 0;
4636 ref->bytes = max_size;
4639 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4640 if (max_size > rec->max_size)
4641 rec->max_size = max_size;
4645 /* Check if the type of extent matches with its chunk */
4646 static void check_extent_type(struct extent_record *rec)
4648 struct btrfs_block_group_cache *bg_cache;
4650 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4654 /* data extent, check chunk directly*/
4655 if (!rec->metadata) {
4656 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4657 rec->wrong_chunk_type = 1;
4661 /* metadata extent, check the obvious case first */
4662 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4663 BTRFS_BLOCK_GROUP_METADATA))) {
4664 rec->wrong_chunk_type = 1;
4669 * Check SYSTEM extent, as it's also marked as metadata, we can only
4670 * make sure it's a SYSTEM extent by its backref
4672 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4673 struct extent_backref *node;
4674 struct tree_backref *tback;
4677 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4678 if (node->is_data) {
4679 /* tree block shouldn't have data backref */
4680 rec->wrong_chunk_type = 1;
4683 tback = container_of(node, struct tree_backref, node);
4685 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4686 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4688 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4689 if (!(bg_cache->flags & bg_type))
4690 rec->wrong_chunk_type = 1;
4695 * Allocate a new extent record, fill default values from @tmpl and insert int
4696 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4697 * the cache, otherwise it fails.
4699 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4700 struct extent_record *tmpl)
4702 struct extent_record *rec;
4705 rec = malloc(sizeof(*rec));
4708 rec->start = tmpl->start;
4709 rec->max_size = tmpl->max_size;
4710 rec->nr = max(tmpl->nr, tmpl->max_size);
4711 rec->found_rec = tmpl->found_rec;
4712 rec->content_checked = tmpl->content_checked;
4713 rec->owner_ref_checked = tmpl->owner_ref_checked;
4714 rec->num_duplicates = 0;
4715 rec->metadata = tmpl->metadata;
4716 rec->flag_block_full_backref = FLAG_UNSET;
4717 rec->bad_full_backref = 0;
4718 rec->crossing_stripes = 0;
4719 rec->wrong_chunk_type = 0;
4720 rec->is_root = tmpl->is_root;
4721 rec->refs = tmpl->refs;
4722 rec->extent_item_refs = tmpl->extent_item_refs;
4723 rec->parent_generation = tmpl->parent_generation;
4724 INIT_LIST_HEAD(&rec->backrefs);
4725 INIT_LIST_HEAD(&rec->dups);
4726 INIT_LIST_HEAD(&rec->list);
4727 rec->backref_tree = RB_ROOT;
4728 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4729 rec->cache.start = tmpl->start;
4730 rec->cache.size = tmpl->nr;
4731 ret = insert_cache_extent(extent_cache, &rec->cache);
4733 bytes_used += rec->nr;
4736 rec->crossing_stripes = check_crossing_stripes(rec->start,
4737 global_info->tree_root->nodesize);
4738 check_extent_type(rec);
4743 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4745 * - refs - if found, increase refs
4746 * - is_root - if found, set
4747 * - content_checked - if found, set
4748 * - owner_ref_checked - if found, set
4750 * If not found, create a new one, initialize and insert.
4752 static int add_extent_rec(struct cache_tree *extent_cache,
4753 struct extent_record *tmpl)
4755 struct extent_record *rec;
4756 struct cache_extent *cache;
4760 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4762 rec = container_of(cache, struct extent_record, cache);
4766 rec->nr = max(tmpl->nr, tmpl->max_size);
4769 * We need to make sure to reset nr to whatever the extent
4770 * record says was the real size, this way we can compare it to
4773 if (tmpl->found_rec) {
4774 if (tmpl->start != rec->start || rec->found_rec) {
4775 struct extent_record *tmp;
4778 if (list_empty(&rec->list))
4779 list_add_tail(&rec->list,
4780 &duplicate_extents);
4783 * We have to do this song and dance in case we
4784 * find an extent record that falls inside of
4785 * our current extent record but does not have
4786 * the same objectid.
4788 tmp = malloc(sizeof(*tmp));
4791 tmp->start = tmpl->start;
4792 tmp->max_size = tmpl->max_size;
4795 tmp->metadata = tmpl->metadata;
4796 tmp->extent_item_refs = tmpl->extent_item_refs;
4797 INIT_LIST_HEAD(&tmp->list);
4798 list_add_tail(&tmp->list, &rec->dups);
4799 rec->num_duplicates++;
4806 if (tmpl->extent_item_refs && !dup) {
4807 if (rec->extent_item_refs) {
4808 fprintf(stderr, "block %llu rec "
4809 "extent_item_refs %llu, passed %llu\n",
4810 (unsigned long long)tmpl->start,
4811 (unsigned long long)
4812 rec->extent_item_refs,
4813 (unsigned long long)tmpl->extent_item_refs);
4815 rec->extent_item_refs = tmpl->extent_item_refs;
4819 if (tmpl->content_checked)
4820 rec->content_checked = 1;
4821 if (tmpl->owner_ref_checked)
4822 rec->owner_ref_checked = 1;
4823 memcpy(&rec->parent_key, &tmpl->parent_key,
4824 sizeof(tmpl->parent_key));
4825 if (tmpl->parent_generation)
4826 rec->parent_generation = tmpl->parent_generation;
4827 if (rec->max_size < tmpl->max_size)
4828 rec->max_size = tmpl->max_size;
4831 * A metadata extent can't cross stripe_len boundary, otherwise
4832 * kernel scrub won't be able to handle it.
4833 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4837 rec->crossing_stripes = check_crossing_stripes(
4838 rec->start, global_info->tree_root->nodesize);
4839 check_extent_type(rec);
4840 maybe_free_extent_rec(extent_cache, rec);
4844 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4849 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4850 u64 parent, u64 root, int found_ref)
4852 struct extent_record *rec;
4853 struct tree_backref *back;
4854 struct cache_extent *cache;
4856 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4858 struct extent_record tmpl;
4860 memset(&tmpl, 0, sizeof(tmpl));
4861 tmpl.start = bytenr;
4865 add_extent_rec_nolookup(extent_cache, &tmpl);
4867 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4872 rec = container_of(cache, struct extent_record, cache);
4873 if (rec->start != bytenr) {
4877 back = find_tree_backref(rec, parent, root);
4879 back = alloc_tree_backref(rec, parent, root);
4884 if (back->node.found_ref) {
4885 fprintf(stderr, "Extent back ref already exists "
4886 "for %llu parent %llu root %llu \n",
4887 (unsigned long long)bytenr,
4888 (unsigned long long)parent,
4889 (unsigned long long)root);
4891 back->node.found_ref = 1;
4893 if (back->node.found_extent_tree) {
4894 fprintf(stderr, "Extent back ref already exists "
4895 "for %llu parent %llu root %llu \n",
4896 (unsigned long long)bytenr,
4897 (unsigned long long)parent,
4898 (unsigned long long)root);
4900 back->node.found_extent_tree = 1;
4902 check_extent_type(rec);
4903 maybe_free_extent_rec(extent_cache, rec);
4907 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4908 u64 parent, u64 root, u64 owner, u64 offset,
4909 u32 num_refs, int found_ref, u64 max_size)
4911 struct extent_record *rec;
4912 struct data_backref *back;
4913 struct cache_extent *cache;
4915 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4917 struct extent_record tmpl;
4919 memset(&tmpl, 0, sizeof(tmpl));
4920 tmpl.start = bytenr;
4922 tmpl.max_size = max_size;
4924 add_extent_rec_nolookup(extent_cache, &tmpl);
4926 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4931 rec = container_of(cache, struct extent_record, cache);
4932 if (rec->max_size < max_size)
4933 rec->max_size = max_size;
4936 * If found_ref is set then max_size is the real size and must match the
4937 * existing refs. So if we have already found a ref then we need to
4938 * make sure that this ref matches the existing one, otherwise we need
4939 * to add a new backref so we can notice that the backrefs don't match
4940 * and we need to figure out who is telling the truth. This is to
4941 * account for that awful fsync bug I introduced where we'd end up with
4942 * a btrfs_file_extent_item that would have its length include multiple
4943 * prealloc extents or point inside of a prealloc extent.
4945 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4948 back = alloc_data_backref(rec, parent, root, owner, offset,
4954 BUG_ON(num_refs != 1);
4955 if (back->node.found_ref)
4956 BUG_ON(back->bytes != max_size);
4957 back->node.found_ref = 1;
4958 back->found_ref += 1;
4959 back->bytes = max_size;
4960 back->disk_bytenr = bytenr;
4962 rec->content_checked = 1;
4963 rec->owner_ref_checked = 1;
4965 if (back->node.found_extent_tree) {
4966 fprintf(stderr, "Extent back ref already exists "
4967 "for %llu parent %llu root %llu "
4968 "owner %llu offset %llu num_refs %lu\n",
4969 (unsigned long long)bytenr,
4970 (unsigned long long)parent,
4971 (unsigned long long)root,
4972 (unsigned long long)owner,
4973 (unsigned long long)offset,
4974 (unsigned long)num_refs);
4976 back->num_refs = num_refs;
4977 back->node.found_extent_tree = 1;
4979 maybe_free_extent_rec(extent_cache, rec);
4983 static int add_pending(struct cache_tree *pending,
4984 struct cache_tree *seen, u64 bytenr, u32 size)
4987 ret = add_cache_extent(seen, bytenr, size);
4990 add_cache_extent(pending, bytenr, size);
4994 static int pick_next_pending(struct cache_tree *pending,
4995 struct cache_tree *reada,
4996 struct cache_tree *nodes,
4997 u64 last, struct block_info *bits, int bits_nr,
5000 unsigned long node_start = last;
5001 struct cache_extent *cache;
5004 cache = search_cache_extent(reada, 0);
5006 bits[0].start = cache->start;
5007 bits[0].size = cache->size;
5012 if (node_start > 32768)
5013 node_start -= 32768;
5015 cache = search_cache_extent(nodes, node_start);
5017 cache = search_cache_extent(nodes, 0);
5020 cache = search_cache_extent(pending, 0);
5025 bits[ret].start = cache->start;
5026 bits[ret].size = cache->size;
5027 cache = next_cache_extent(cache);
5029 } while (cache && ret < bits_nr);
5035 bits[ret].start = cache->start;
5036 bits[ret].size = cache->size;
5037 cache = next_cache_extent(cache);
5039 } while (cache && ret < bits_nr);
5041 if (bits_nr - ret > 8) {
5042 u64 lookup = bits[0].start + bits[0].size;
5043 struct cache_extent *next;
5044 next = search_cache_extent(pending, lookup);
5046 if (next->start - lookup > 32768)
5048 bits[ret].start = next->start;
5049 bits[ret].size = next->size;
5050 lookup = next->start + next->size;
5054 next = next_cache_extent(next);
5062 static void free_chunk_record(struct cache_extent *cache)
5064 struct chunk_record *rec;
5066 rec = container_of(cache, struct chunk_record, cache);
5067 list_del_init(&rec->list);
5068 list_del_init(&rec->dextents);
5072 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5074 cache_tree_free_extents(chunk_cache, free_chunk_record);
5077 static void free_device_record(struct rb_node *node)
5079 struct device_record *rec;
5081 rec = container_of(node, struct device_record, node);
5085 FREE_RB_BASED_TREE(device_cache, free_device_record);
5087 int insert_block_group_record(struct block_group_tree *tree,
5088 struct block_group_record *bg_rec)
5092 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5096 list_add_tail(&bg_rec->list, &tree->block_groups);
5100 static void free_block_group_record(struct cache_extent *cache)
5102 struct block_group_record *rec;
5104 rec = container_of(cache, struct block_group_record, cache);
5105 list_del_init(&rec->list);
5109 void free_block_group_tree(struct block_group_tree *tree)
5111 cache_tree_free_extents(&tree->tree, free_block_group_record);
5114 int insert_device_extent_record(struct device_extent_tree *tree,
5115 struct device_extent_record *de_rec)
5120 * Device extent is a bit different from the other extents, because
5121 * the extents which belong to the different devices may have the
5122 * same start and size, so we need use the special extent cache
5123 * search/insert functions.
5125 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5129 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5130 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5134 static void free_device_extent_record(struct cache_extent *cache)
5136 struct device_extent_record *rec;
5138 rec = container_of(cache, struct device_extent_record, cache);
5139 if (!list_empty(&rec->chunk_list))
5140 list_del_init(&rec->chunk_list);
5141 if (!list_empty(&rec->device_list))
5142 list_del_init(&rec->device_list);
5146 void free_device_extent_tree(struct device_extent_tree *tree)
5148 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5151 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5152 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5153 struct extent_buffer *leaf, int slot)
5155 struct btrfs_extent_ref_v0 *ref0;
5156 struct btrfs_key key;
5158 btrfs_item_key_to_cpu(leaf, &key, slot);
5159 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5160 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5161 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5163 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5164 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5170 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5171 struct btrfs_key *key,
5174 struct btrfs_chunk *ptr;
5175 struct chunk_record *rec;
5178 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5179 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5181 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5183 fprintf(stderr, "memory allocation failed\n");
5187 INIT_LIST_HEAD(&rec->list);
5188 INIT_LIST_HEAD(&rec->dextents);
5191 rec->cache.start = key->offset;
5192 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5194 rec->generation = btrfs_header_generation(leaf);
5196 rec->objectid = key->objectid;
5197 rec->type = key->type;
5198 rec->offset = key->offset;
5200 rec->length = rec->cache.size;
5201 rec->owner = btrfs_chunk_owner(leaf, ptr);
5202 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5203 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5204 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5205 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5206 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5207 rec->num_stripes = num_stripes;
5208 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5210 for (i = 0; i < rec->num_stripes; ++i) {
5211 rec->stripes[i].devid =
5212 btrfs_stripe_devid_nr(leaf, ptr, i);
5213 rec->stripes[i].offset =
5214 btrfs_stripe_offset_nr(leaf, ptr, i);
5215 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5216 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5223 static int process_chunk_item(struct cache_tree *chunk_cache,
5224 struct btrfs_key *key, struct extent_buffer *eb,
5227 struct chunk_record *rec;
5228 struct btrfs_chunk *chunk;
5231 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5233 * Do extra check for this chunk item,
5235 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5236 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5237 * and owner<->key_type check.
5239 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5242 error("chunk(%llu, %llu) is not valid, ignore it",
5243 key->offset, btrfs_chunk_length(eb, chunk));
5246 rec = btrfs_new_chunk_record(eb, key, slot);
5247 ret = insert_cache_extent(chunk_cache, &rec->cache);
5249 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5250 rec->offset, rec->length);
5257 static int process_device_item(struct rb_root *dev_cache,
5258 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5260 struct btrfs_dev_item *ptr;
5261 struct device_record *rec;
5264 ptr = btrfs_item_ptr(eb,
5265 slot, struct btrfs_dev_item);
5267 rec = malloc(sizeof(*rec));
5269 fprintf(stderr, "memory allocation failed\n");
5273 rec->devid = key->offset;
5274 rec->generation = btrfs_header_generation(eb);
5276 rec->objectid = key->objectid;
5277 rec->type = key->type;
5278 rec->offset = key->offset;
5280 rec->devid = btrfs_device_id(eb, ptr);
5281 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5282 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5284 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5286 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5293 struct block_group_record *
5294 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5297 struct btrfs_block_group_item *ptr;
5298 struct block_group_record *rec;
5300 rec = calloc(1, sizeof(*rec));
5302 fprintf(stderr, "memory allocation failed\n");
5306 rec->cache.start = key->objectid;
5307 rec->cache.size = key->offset;
5309 rec->generation = btrfs_header_generation(leaf);
5311 rec->objectid = key->objectid;
5312 rec->type = key->type;
5313 rec->offset = key->offset;
5315 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5316 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5318 INIT_LIST_HEAD(&rec->list);
5323 static int process_block_group_item(struct block_group_tree *block_group_cache,
5324 struct btrfs_key *key,
5325 struct extent_buffer *eb, int slot)
5327 struct block_group_record *rec;
5330 rec = btrfs_new_block_group_record(eb, key, slot);
5331 ret = insert_block_group_record(block_group_cache, rec);
5333 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5334 rec->objectid, rec->offset);
5341 struct device_extent_record *
5342 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5343 struct btrfs_key *key, int slot)
5345 struct device_extent_record *rec;
5346 struct btrfs_dev_extent *ptr;
5348 rec = calloc(1, sizeof(*rec));
5350 fprintf(stderr, "memory allocation failed\n");
5354 rec->cache.objectid = key->objectid;
5355 rec->cache.start = key->offset;
5357 rec->generation = btrfs_header_generation(leaf);
5359 rec->objectid = key->objectid;
5360 rec->type = key->type;
5361 rec->offset = key->offset;
5363 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5364 rec->chunk_objecteid =
5365 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5367 btrfs_dev_extent_chunk_offset(leaf, ptr);
5368 rec->length = btrfs_dev_extent_length(leaf, ptr);
5369 rec->cache.size = rec->length;
5371 INIT_LIST_HEAD(&rec->chunk_list);
5372 INIT_LIST_HEAD(&rec->device_list);
5378 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5379 struct btrfs_key *key, struct extent_buffer *eb,
5382 struct device_extent_record *rec;
5385 rec = btrfs_new_device_extent_record(eb, key, slot);
5386 ret = insert_device_extent_record(dev_extent_cache, rec);
5389 "Device extent[%llu, %llu, %llu] existed.\n",
5390 rec->objectid, rec->offset, rec->length);
5397 static int process_extent_item(struct btrfs_root *root,
5398 struct cache_tree *extent_cache,
5399 struct extent_buffer *eb, int slot)
5401 struct btrfs_extent_item *ei;
5402 struct btrfs_extent_inline_ref *iref;
5403 struct btrfs_extent_data_ref *dref;
5404 struct btrfs_shared_data_ref *sref;
5405 struct btrfs_key key;
5406 struct extent_record tmpl;
5410 u32 item_size = btrfs_item_size_nr(eb, slot);
5416 btrfs_item_key_to_cpu(eb, &key, slot);
5418 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5420 num_bytes = root->nodesize;
5422 num_bytes = key.offset;
5425 if (item_size < sizeof(*ei)) {
5426 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5427 struct btrfs_extent_item_v0 *ei0;
5428 BUG_ON(item_size != sizeof(*ei0));
5429 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5430 refs = btrfs_extent_refs_v0(eb, ei0);
5434 memset(&tmpl, 0, sizeof(tmpl));
5435 tmpl.start = key.objectid;
5436 tmpl.nr = num_bytes;
5437 tmpl.extent_item_refs = refs;
5438 tmpl.metadata = metadata;
5440 tmpl.max_size = num_bytes;
5442 return add_extent_rec(extent_cache, &tmpl);
5445 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5446 refs = btrfs_extent_refs(eb, ei);
5447 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5452 memset(&tmpl, 0, sizeof(tmpl));
5453 tmpl.start = key.objectid;
5454 tmpl.nr = num_bytes;
5455 tmpl.extent_item_refs = refs;
5456 tmpl.metadata = metadata;
5458 tmpl.max_size = num_bytes;
5459 add_extent_rec(extent_cache, &tmpl);
5461 ptr = (unsigned long)(ei + 1);
5462 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5463 key.type == BTRFS_EXTENT_ITEM_KEY)
5464 ptr += sizeof(struct btrfs_tree_block_info);
5466 end = (unsigned long)ei + item_size;
5468 iref = (struct btrfs_extent_inline_ref *)ptr;
5469 type = btrfs_extent_inline_ref_type(eb, iref);
5470 offset = btrfs_extent_inline_ref_offset(eb, iref);
5472 case BTRFS_TREE_BLOCK_REF_KEY:
5473 add_tree_backref(extent_cache, key.objectid,
5476 case BTRFS_SHARED_BLOCK_REF_KEY:
5477 add_tree_backref(extent_cache, key.objectid,
5480 case BTRFS_EXTENT_DATA_REF_KEY:
5481 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5482 add_data_backref(extent_cache, key.objectid, 0,
5483 btrfs_extent_data_ref_root(eb, dref),
5484 btrfs_extent_data_ref_objectid(eb,
5486 btrfs_extent_data_ref_offset(eb, dref),
5487 btrfs_extent_data_ref_count(eb, dref),
5490 case BTRFS_SHARED_DATA_REF_KEY:
5491 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5492 add_data_backref(extent_cache, key.objectid, offset,
5494 btrfs_shared_data_ref_count(eb, sref),
5498 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5499 key.objectid, key.type, num_bytes);
5502 ptr += btrfs_extent_inline_ref_size(type);
5509 static int check_cache_range(struct btrfs_root *root,
5510 struct btrfs_block_group_cache *cache,
5511 u64 offset, u64 bytes)
5513 struct btrfs_free_space *entry;
5519 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5520 bytenr = btrfs_sb_offset(i);
5521 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5522 cache->key.objectid, bytenr, 0,
5523 &logical, &nr, &stripe_len);
5528 if (logical[nr] + stripe_len <= offset)
5530 if (offset + bytes <= logical[nr])
5532 if (logical[nr] == offset) {
5533 if (stripe_len >= bytes) {
5537 bytes -= stripe_len;
5538 offset += stripe_len;
5539 } else if (logical[nr] < offset) {
5540 if (logical[nr] + stripe_len >=
5545 bytes = (offset + bytes) -
5546 (logical[nr] + stripe_len);
5547 offset = logical[nr] + stripe_len;
5550 * Could be tricky, the super may land in the
5551 * middle of the area we're checking. First
5552 * check the easiest case, it's at the end.
5554 if (logical[nr] + stripe_len >=
5556 bytes = logical[nr] - offset;
5560 /* Check the left side */
5561 ret = check_cache_range(root, cache,
5563 logical[nr] - offset);
5569 /* Now we continue with the right side */
5570 bytes = (offset + bytes) -
5571 (logical[nr] + stripe_len);
5572 offset = logical[nr] + stripe_len;
5579 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5581 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5582 offset, offset+bytes);
5586 if (entry->offset != offset) {
5587 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5592 if (entry->bytes != bytes) {
5593 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5594 bytes, entry->bytes, offset);
5598 unlink_free_space(cache->free_space_ctl, entry);
5603 static int verify_space_cache(struct btrfs_root *root,
5604 struct btrfs_block_group_cache *cache)
5606 struct btrfs_path *path;
5607 struct extent_buffer *leaf;
5608 struct btrfs_key key;
5612 path = btrfs_alloc_path();
5616 root = root->fs_info->extent_root;
5618 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5620 key.objectid = last;
5622 key.type = BTRFS_EXTENT_ITEM_KEY;
5624 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5629 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5630 ret = btrfs_next_leaf(root, path);
5638 leaf = path->nodes[0];
5639 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5640 if (key.objectid >= cache->key.offset + cache->key.objectid)
5642 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5643 key.type != BTRFS_METADATA_ITEM_KEY) {
5648 if (last == key.objectid) {
5649 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5650 last = key.objectid + key.offset;
5652 last = key.objectid + root->nodesize;
5657 ret = check_cache_range(root, cache, last,
5658 key.objectid - last);
5661 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5662 last = key.objectid + key.offset;
5664 last = key.objectid + root->nodesize;
5668 if (last < cache->key.objectid + cache->key.offset)
5669 ret = check_cache_range(root, cache, last,
5670 cache->key.objectid +
5671 cache->key.offset - last);
5674 btrfs_free_path(path);
5677 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5678 fprintf(stderr, "There are still entries left in the space "
5686 static int check_space_cache(struct btrfs_root *root)
5688 struct btrfs_block_group_cache *cache;
5689 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5693 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5694 btrfs_super_generation(root->fs_info->super_copy) !=
5695 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5696 printf("cache and super generation don't match, space cache "
5697 "will be invalidated\n");
5701 if (ctx.progress_enabled) {
5702 ctx.tp = TASK_FREE_SPACE;
5703 task_start(ctx.info);
5707 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5711 start = cache->key.objectid + cache->key.offset;
5712 if (!cache->free_space_ctl) {
5713 if (btrfs_init_free_space_ctl(cache,
5714 root->sectorsize)) {
5719 btrfs_remove_free_space_cache(cache);
5722 if (btrfs_fs_compat_ro(root->fs_info,
5723 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5724 ret = exclude_super_stripes(root, cache);
5726 fprintf(stderr, "could not exclude super stripes: %s\n",
5731 ret = load_free_space_tree(root->fs_info, cache);
5732 free_excluded_extents(root, cache);
5734 fprintf(stderr, "could not load free space tree: %s\n",
5741 ret = load_free_space_cache(root->fs_info, cache);
5746 ret = verify_space_cache(root, cache);
5748 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5749 cache->key.objectid);
5754 task_stop(ctx.info);
5756 return error ? -EINVAL : 0;
5759 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5760 u64 num_bytes, unsigned long leaf_offset,
5761 struct extent_buffer *eb) {
5764 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5766 unsigned long csum_offset;
5770 u64 data_checked = 0;
5776 if (num_bytes % root->sectorsize)
5779 data = malloc(num_bytes);
5783 while (offset < num_bytes) {
5786 read_len = num_bytes - offset;
5787 /* read as much space once a time */
5788 ret = read_extent_data(root, data + offset,
5789 bytenr + offset, &read_len, mirror);
5793 /* verify every 4k data's checksum */
5794 while (data_checked < read_len) {
5796 tmp = offset + data_checked;
5798 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5799 csum, root->sectorsize);
5800 btrfs_csum_final(csum, (char *)&csum);
5802 csum_offset = leaf_offset +
5803 tmp / root->sectorsize * csum_size;
5804 read_extent_buffer(eb, (char *)&csum_expected,
5805 csum_offset, csum_size);
5806 /* try another mirror */
5807 if (csum != csum_expected) {
5808 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5809 mirror, bytenr + tmp,
5810 csum, csum_expected);
5811 num_copies = btrfs_num_copies(
5812 &root->fs_info->mapping_tree,
5814 if (mirror < num_copies - 1) {
5819 data_checked += root->sectorsize;
5828 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5831 struct btrfs_path *path;
5832 struct extent_buffer *leaf;
5833 struct btrfs_key key;
5836 path = btrfs_alloc_path();
5838 fprintf(stderr, "Error allocating path\n");
5842 key.objectid = bytenr;
5843 key.type = BTRFS_EXTENT_ITEM_KEY;
5844 key.offset = (u64)-1;
5847 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5850 fprintf(stderr, "Error looking up extent record %d\n", ret);
5851 btrfs_free_path(path);
5854 if (path->slots[0] > 0) {
5857 ret = btrfs_prev_leaf(root, path);
5860 } else if (ret > 0) {
5867 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5870 * Block group items come before extent items if they have the same
5871 * bytenr, so walk back one more just in case. Dear future traveller,
5872 * first congrats on mastering time travel. Now if it's not too much
5873 * trouble could you go back to 2006 and tell Chris to make the
5874 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5875 * EXTENT_ITEM_KEY please?
5877 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5878 if (path->slots[0] > 0) {
5881 ret = btrfs_prev_leaf(root, path);
5884 } else if (ret > 0) {
5889 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5893 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5894 ret = btrfs_next_leaf(root, path);
5896 fprintf(stderr, "Error going to next leaf "
5898 btrfs_free_path(path);
5904 leaf = path->nodes[0];
5905 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5906 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5910 if (key.objectid + key.offset < bytenr) {
5914 if (key.objectid > bytenr + num_bytes)
5917 if (key.objectid == bytenr) {
5918 if (key.offset >= num_bytes) {
5922 num_bytes -= key.offset;
5923 bytenr += key.offset;
5924 } else if (key.objectid < bytenr) {
5925 if (key.objectid + key.offset >= bytenr + num_bytes) {
5929 num_bytes = (bytenr + num_bytes) -
5930 (key.objectid + key.offset);
5931 bytenr = key.objectid + key.offset;
5933 if (key.objectid + key.offset < bytenr + num_bytes) {
5934 u64 new_start = key.objectid + key.offset;
5935 u64 new_bytes = bytenr + num_bytes - new_start;
5938 * Weird case, the extent is in the middle of
5939 * our range, we'll have to search one side
5940 * and then the other. Not sure if this happens
5941 * in real life, but no harm in coding it up
5942 * anyway just in case.
5944 btrfs_release_path(path);
5945 ret = check_extent_exists(root, new_start,
5948 fprintf(stderr, "Right section didn't "
5952 num_bytes = key.objectid - bytenr;
5955 num_bytes = key.objectid - bytenr;
5962 if (num_bytes && !ret) {
5963 fprintf(stderr, "There are no extents for csum range "
5964 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5968 btrfs_free_path(path);
5972 static int check_csums(struct btrfs_root *root)
5974 struct btrfs_path *path;
5975 struct extent_buffer *leaf;
5976 struct btrfs_key key;
5977 u64 offset = 0, num_bytes = 0;
5978 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5982 unsigned long leaf_offset;
5984 root = root->fs_info->csum_root;
5985 if (!extent_buffer_uptodate(root->node)) {
5986 fprintf(stderr, "No valid csum tree found\n");
5990 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5991 key.type = BTRFS_EXTENT_CSUM_KEY;
5994 path = btrfs_alloc_path();
5998 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6000 fprintf(stderr, "Error searching csum tree %d\n", ret);
6001 btrfs_free_path(path);
6005 if (ret > 0 && path->slots[0])
6010 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6011 ret = btrfs_next_leaf(root, path);
6013 fprintf(stderr, "Error going to next leaf "
6020 leaf = path->nodes[0];
6022 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6023 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6028 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6029 csum_size) * root->sectorsize;
6030 if (!check_data_csum)
6031 goto skip_csum_check;
6032 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6033 ret = check_extent_csums(root, key.offset, data_len,
6039 offset = key.offset;
6040 } else if (key.offset != offset + num_bytes) {
6041 ret = check_extent_exists(root, offset, num_bytes);
6043 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6044 "there is no extent record\n",
6045 offset, offset+num_bytes);
6048 offset = key.offset;
6051 num_bytes += data_len;
6055 btrfs_free_path(path);
6059 static int is_dropped_key(struct btrfs_key *key,
6060 struct btrfs_key *drop_key) {
6061 if (key->objectid < drop_key->objectid)
6063 else if (key->objectid == drop_key->objectid) {
6064 if (key->type < drop_key->type)
6066 else if (key->type == drop_key->type) {
6067 if (key->offset < drop_key->offset)
6075 * Here are the rules for FULL_BACKREF.
6077 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6078 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6080 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6081 * if it happened after the relocation occurred since we'll have dropped the
6082 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6083 * have no real way to know for sure.
6085 * We process the blocks one root at a time, and we start from the lowest root
6086 * objectid and go to the highest. So we can just lookup the owner backref for
6087 * the record and if we don't find it then we know it doesn't exist and we have
6090 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6091 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6092 * be set or not and then we can check later once we've gathered all the refs.
6094 static int calc_extent_flag(struct btrfs_root *root,
6095 struct cache_tree *extent_cache,
6096 struct extent_buffer *buf,
6097 struct root_item_record *ri,
6100 struct extent_record *rec;
6101 struct cache_extent *cache;
6102 struct tree_backref *tback;
6105 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6106 /* we have added this extent before */
6108 rec = container_of(cache, struct extent_record, cache);
6111 * Except file/reloc tree, we can not have
6114 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6119 if (buf->start == ri->bytenr)
6122 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6125 owner = btrfs_header_owner(buf);
6126 if (owner == ri->objectid)
6129 tback = find_tree_backref(rec, 0, owner);
6134 if (rec->flag_block_full_backref != FLAG_UNSET &&
6135 rec->flag_block_full_backref != 0)
6136 rec->bad_full_backref = 1;
6139 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6140 if (rec->flag_block_full_backref != FLAG_UNSET &&
6141 rec->flag_block_full_backref != 1)
6142 rec->bad_full_backref = 1;
6146 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6148 fprintf(stderr, "Invalid key type(");
6149 print_key_type(stderr, 0, key_type);
6150 fprintf(stderr, ") found in root(");
6151 print_objectid(stderr, rootid, 0);
6152 fprintf(stderr, ")\n");
6156 * Check if the key is valid with its extent buffer.
6158 * This is a early check in case invalid key exists in a extent buffer
6159 * This is not comprehensive yet, but should prevent wrong key/item passed
6162 static int check_type_with_root(u64 rootid, u8 key_type)
6165 /* Only valid in chunk tree */
6166 case BTRFS_DEV_ITEM_KEY:
6167 case BTRFS_CHUNK_ITEM_KEY:
6168 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6171 /* valid in csum and log tree */
6172 case BTRFS_CSUM_TREE_OBJECTID:
6173 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6177 case BTRFS_EXTENT_ITEM_KEY:
6178 case BTRFS_METADATA_ITEM_KEY:
6179 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6180 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6183 case BTRFS_ROOT_ITEM_KEY:
6184 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6187 case BTRFS_DEV_EXTENT_KEY:
6188 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6194 report_mismatch_key_root(key_type, rootid);
6198 static int run_next_block(struct btrfs_root *root,
6199 struct block_info *bits,
6202 struct cache_tree *pending,
6203 struct cache_tree *seen,
6204 struct cache_tree *reada,
6205 struct cache_tree *nodes,
6206 struct cache_tree *extent_cache,
6207 struct cache_tree *chunk_cache,
6208 struct rb_root *dev_cache,
6209 struct block_group_tree *block_group_cache,
6210 struct device_extent_tree *dev_extent_cache,
6211 struct root_item_record *ri)
6213 struct extent_buffer *buf;
6214 struct extent_record *rec = NULL;
6225 struct btrfs_key key;
6226 struct cache_extent *cache;
6229 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6230 bits_nr, &reada_bits);
6235 for(i = 0; i < nritems; i++) {
6236 ret = add_cache_extent(reada, bits[i].start,
6241 /* fixme, get the parent transid */
6242 readahead_tree_block(root, bits[i].start,
6246 *last = bits[0].start;
6247 bytenr = bits[0].start;
6248 size = bits[0].size;
6250 cache = lookup_cache_extent(pending, bytenr, size);
6252 remove_cache_extent(pending, cache);
6255 cache = lookup_cache_extent(reada, bytenr, size);
6257 remove_cache_extent(reada, cache);
6260 cache = lookup_cache_extent(nodes, bytenr, size);
6262 remove_cache_extent(nodes, cache);
6265 cache = lookup_cache_extent(extent_cache, bytenr, size);
6267 rec = container_of(cache, struct extent_record, cache);
6268 gen = rec->parent_generation;
6271 /* fixme, get the real parent transid */
6272 buf = read_tree_block(root, bytenr, size, gen);
6273 if (!extent_buffer_uptodate(buf)) {
6274 record_bad_block_io(root->fs_info,
6275 extent_cache, bytenr, size);
6279 nritems = btrfs_header_nritems(buf);
6282 if (!init_extent_tree) {
6283 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6284 btrfs_header_level(buf), 1, NULL,
6287 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6289 fprintf(stderr, "Couldn't calc extent flags\n");
6290 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6295 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6297 fprintf(stderr, "Couldn't calc extent flags\n");
6298 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6302 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6304 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6305 ri->objectid == btrfs_header_owner(buf)) {
6307 * Ok we got to this block from it's original owner and
6308 * we have FULL_BACKREF set. Relocation can leave
6309 * converted blocks over so this is altogether possible,
6310 * however it's not possible if the generation > the
6311 * last snapshot, so check for this case.
6313 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6314 btrfs_header_generation(buf) > ri->last_snapshot) {
6315 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6316 rec->bad_full_backref = 1;
6321 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6322 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6323 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6324 rec->bad_full_backref = 1;
6328 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6329 rec->flag_block_full_backref = 1;
6333 rec->flag_block_full_backref = 0;
6335 owner = btrfs_header_owner(buf);
6338 ret = check_block(root, extent_cache, buf, flags);
6342 if (btrfs_is_leaf(buf)) {
6343 btree_space_waste += btrfs_leaf_free_space(root, buf);
6344 for (i = 0; i < nritems; i++) {
6345 struct btrfs_file_extent_item *fi;
6346 btrfs_item_key_to_cpu(buf, &key, i);
6348 * Check key type against the leaf owner.
6349 * Could filter quite a lot of early error if
6352 if (check_type_with_root(btrfs_header_owner(buf),
6354 fprintf(stderr, "ignoring invalid key\n");
6357 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6358 process_extent_item(root, extent_cache, buf,
6362 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6363 process_extent_item(root, extent_cache, buf,
6367 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6369 btrfs_item_size_nr(buf, i);
6372 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6373 process_chunk_item(chunk_cache, &key, buf, i);
6376 if (key.type == BTRFS_DEV_ITEM_KEY) {
6377 process_device_item(dev_cache, &key, buf, i);
6380 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6381 process_block_group_item(block_group_cache,
6385 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6386 process_device_extent_item(dev_extent_cache,
6391 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6392 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6393 process_extent_ref_v0(extent_cache, buf, i);
6400 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6401 add_tree_backref(extent_cache, key.objectid, 0,
6405 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6406 add_tree_backref(extent_cache, key.objectid,
6410 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6411 struct btrfs_extent_data_ref *ref;
6412 ref = btrfs_item_ptr(buf, i,
6413 struct btrfs_extent_data_ref);
6414 add_data_backref(extent_cache,
6416 btrfs_extent_data_ref_root(buf, ref),
6417 btrfs_extent_data_ref_objectid(buf,
6419 btrfs_extent_data_ref_offset(buf, ref),
6420 btrfs_extent_data_ref_count(buf, ref),
6421 0, root->sectorsize);
6424 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6425 struct btrfs_shared_data_ref *ref;
6426 ref = btrfs_item_ptr(buf, i,
6427 struct btrfs_shared_data_ref);
6428 add_data_backref(extent_cache,
6429 key.objectid, key.offset, 0, 0, 0,
6430 btrfs_shared_data_ref_count(buf, ref),
6431 0, root->sectorsize);
6434 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6435 struct bad_item *bad;
6437 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6441 bad = malloc(sizeof(struct bad_item));
6444 INIT_LIST_HEAD(&bad->list);
6445 memcpy(&bad->key, &key,
6446 sizeof(struct btrfs_key));
6447 bad->root_id = owner;
6448 list_add_tail(&bad->list, &delete_items);
6451 if (key.type != BTRFS_EXTENT_DATA_KEY)
6453 fi = btrfs_item_ptr(buf, i,
6454 struct btrfs_file_extent_item);
6455 if (btrfs_file_extent_type(buf, fi) ==
6456 BTRFS_FILE_EXTENT_INLINE)
6458 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6461 data_bytes_allocated +=
6462 btrfs_file_extent_disk_num_bytes(buf, fi);
6463 if (data_bytes_allocated < root->sectorsize) {
6466 data_bytes_referenced +=
6467 btrfs_file_extent_num_bytes(buf, fi);
6468 add_data_backref(extent_cache,
6469 btrfs_file_extent_disk_bytenr(buf, fi),
6470 parent, owner, key.objectid, key.offset -
6471 btrfs_file_extent_offset(buf, fi), 1, 1,
6472 btrfs_file_extent_disk_num_bytes(buf, fi));
6476 struct btrfs_key first_key;
6478 first_key.objectid = 0;
6481 btrfs_item_key_to_cpu(buf, &first_key, 0);
6482 level = btrfs_header_level(buf);
6483 for (i = 0; i < nritems; i++) {
6484 struct extent_record tmpl;
6486 ptr = btrfs_node_blockptr(buf, i);
6487 size = root->nodesize;
6488 btrfs_node_key_to_cpu(buf, &key, i);
6490 if ((level == ri->drop_level)
6491 && is_dropped_key(&key, &ri->drop_key)) {
6496 memset(&tmpl, 0, sizeof(tmpl));
6497 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6498 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6503 tmpl.max_size = size;
6504 ret = add_extent_rec(extent_cache, &tmpl);
6507 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6510 add_pending(nodes, seen, ptr, size);
6512 add_pending(pending, seen, ptr, size);
6515 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6516 nritems) * sizeof(struct btrfs_key_ptr);
6518 total_btree_bytes += buf->len;
6519 if (fs_root_objectid(btrfs_header_owner(buf)))
6520 total_fs_tree_bytes += buf->len;
6521 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6522 total_extent_tree_bytes += buf->len;
6523 if (!found_old_backref &&
6524 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6525 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6526 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6527 found_old_backref = 1;
6529 free_extent_buffer(buf);
6533 static int add_root_to_pending(struct extent_buffer *buf,
6534 struct cache_tree *extent_cache,
6535 struct cache_tree *pending,
6536 struct cache_tree *seen,
6537 struct cache_tree *nodes,
6540 struct extent_record tmpl;
6542 if (btrfs_header_level(buf) > 0)
6543 add_pending(nodes, seen, buf->start, buf->len);
6545 add_pending(pending, seen, buf->start, buf->len);
6547 memset(&tmpl, 0, sizeof(tmpl));
6548 tmpl.start = buf->start;
6553 tmpl.max_size = buf->len;
6554 add_extent_rec(extent_cache, &tmpl);
6556 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6557 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6558 add_tree_backref(extent_cache, buf->start, buf->start,
6561 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6565 /* as we fix the tree, we might be deleting blocks that
6566 * we're tracking for repair. This hook makes sure we
6567 * remove any backrefs for blocks as we are fixing them.
6569 static int free_extent_hook(struct btrfs_trans_handle *trans,
6570 struct btrfs_root *root,
6571 u64 bytenr, u64 num_bytes, u64 parent,
6572 u64 root_objectid, u64 owner, u64 offset,
6575 struct extent_record *rec;
6576 struct cache_extent *cache;
6578 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6580 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6581 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6585 rec = container_of(cache, struct extent_record, cache);
6587 struct data_backref *back;
6588 back = find_data_backref(rec, parent, root_objectid, owner,
6589 offset, 1, bytenr, num_bytes);
6592 if (back->node.found_ref) {
6593 back->found_ref -= refs_to_drop;
6595 rec->refs -= refs_to_drop;
6597 if (back->node.found_extent_tree) {
6598 back->num_refs -= refs_to_drop;
6599 if (rec->extent_item_refs)
6600 rec->extent_item_refs -= refs_to_drop;
6602 if (back->found_ref == 0)
6603 back->node.found_ref = 0;
6604 if (back->num_refs == 0)
6605 back->node.found_extent_tree = 0;
6607 if (!back->node.found_extent_tree && back->node.found_ref) {
6608 rb_erase(&back->node.node, &rec->backref_tree);
6612 struct tree_backref *back;
6613 back = find_tree_backref(rec, parent, root_objectid);
6616 if (back->node.found_ref) {
6619 back->node.found_ref = 0;
6621 if (back->node.found_extent_tree) {
6622 if (rec->extent_item_refs)
6623 rec->extent_item_refs--;
6624 back->node.found_extent_tree = 0;
6626 if (!back->node.found_extent_tree && back->node.found_ref) {
6627 rb_erase(&back->node.node, &rec->backref_tree);
6631 maybe_free_extent_rec(extent_cache, rec);
6636 static int delete_extent_records(struct btrfs_trans_handle *trans,
6637 struct btrfs_root *root,
6638 struct btrfs_path *path,
6639 u64 bytenr, u64 new_len)
6641 struct btrfs_key key;
6642 struct btrfs_key found_key;
6643 struct extent_buffer *leaf;
6648 key.objectid = bytenr;
6650 key.offset = (u64)-1;
6653 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6660 if (path->slots[0] == 0)
6666 leaf = path->nodes[0];
6667 slot = path->slots[0];
6669 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6670 if (found_key.objectid != bytenr)
6673 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6674 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6675 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6676 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6677 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6678 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6679 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6680 btrfs_release_path(path);
6681 if (found_key.type == 0) {
6682 if (found_key.offset == 0)
6684 key.offset = found_key.offset - 1;
6685 key.type = found_key.type;
6687 key.type = found_key.type - 1;
6688 key.offset = (u64)-1;
6692 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6693 found_key.objectid, found_key.type, found_key.offset);
6695 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6698 btrfs_release_path(path);
6700 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6701 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6702 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6703 found_key.offset : root->nodesize;
6705 ret = btrfs_update_block_group(trans, root, bytenr,
6712 btrfs_release_path(path);
6717 * for a single backref, this will allocate a new extent
6718 * and add the backref to it.
6720 static int record_extent(struct btrfs_trans_handle *trans,
6721 struct btrfs_fs_info *info,
6722 struct btrfs_path *path,
6723 struct extent_record *rec,
6724 struct extent_backref *back,
6725 int allocated, u64 flags)
6728 struct btrfs_root *extent_root = info->extent_root;
6729 struct extent_buffer *leaf;
6730 struct btrfs_key ins_key;
6731 struct btrfs_extent_item *ei;
6732 struct tree_backref *tback;
6733 struct data_backref *dback;
6734 struct btrfs_tree_block_info *bi;
6737 rec->max_size = max_t(u64, rec->max_size,
6738 info->extent_root->nodesize);
6741 u32 item_size = sizeof(*ei);
6744 item_size += sizeof(*bi);
6746 ins_key.objectid = rec->start;
6747 ins_key.offset = rec->max_size;
6748 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6750 ret = btrfs_insert_empty_item(trans, extent_root, path,
6751 &ins_key, item_size);
6755 leaf = path->nodes[0];
6756 ei = btrfs_item_ptr(leaf, path->slots[0],
6757 struct btrfs_extent_item);
6759 btrfs_set_extent_refs(leaf, ei, 0);
6760 btrfs_set_extent_generation(leaf, ei, rec->generation);
6762 if (back->is_data) {
6763 btrfs_set_extent_flags(leaf, ei,
6764 BTRFS_EXTENT_FLAG_DATA);
6766 struct btrfs_disk_key copy_key;;
6768 tback = to_tree_backref(back);
6769 bi = (struct btrfs_tree_block_info *)(ei + 1);
6770 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6773 btrfs_set_disk_key_objectid(©_key,
6774 rec->info_objectid);
6775 btrfs_set_disk_key_type(©_key, 0);
6776 btrfs_set_disk_key_offset(©_key, 0);
6778 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6779 btrfs_set_tree_block_key(leaf, bi, ©_key);
6781 btrfs_set_extent_flags(leaf, ei,
6782 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6785 btrfs_mark_buffer_dirty(leaf);
6786 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6787 rec->max_size, 1, 0);
6790 btrfs_release_path(path);
6793 if (back->is_data) {
6797 dback = to_data_backref(back);
6798 if (back->full_backref)
6799 parent = dback->parent;
6803 for (i = 0; i < dback->found_ref; i++) {
6804 /* if parent != 0, we're doing a full backref
6805 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6806 * just makes the backref allocator create a data
6809 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6810 rec->start, rec->max_size,
6814 BTRFS_FIRST_FREE_OBJECTID :
6820 fprintf(stderr, "adding new data backref"
6821 " on %llu %s %llu owner %llu"
6822 " offset %llu found %d\n",
6823 (unsigned long long)rec->start,
6824 back->full_backref ?
6826 back->full_backref ?
6827 (unsigned long long)parent :
6828 (unsigned long long)dback->root,
6829 (unsigned long long)dback->owner,
6830 (unsigned long long)dback->offset,
6835 tback = to_tree_backref(back);
6836 if (back->full_backref)
6837 parent = tback->parent;
6841 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6842 rec->start, rec->max_size,
6843 parent, tback->root, 0, 0);
6844 fprintf(stderr, "adding new tree backref on "
6845 "start %llu len %llu parent %llu root %llu\n",
6846 rec->start, rec->max_size, parent, tback->root);
6849 btrfs_release_path(path);
6853 static struct extent_entry *find_entry(struct list_head *entries,
6854 u64 bytenr, u64 bytes)
6856 struct extent_entry *entry = NULL;
6858 list_for_each_entry(entry, entries, list) {
6859 if (entry->bytenr == bytenr && entry->bytes == bytes)
6866 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6868 struct extent_entry *entry, *best = NULL, *prev = NULL;
6870 list_for_each_entry(entry, entries, list) {
6877 * If there are as many broken entries as entries then we know
6878 * not to trust this particular entry.
6880 if (entry->broken == entry->count)
6884 * If our current entry == best then we can't be sure our best
6885 * is really the best, so we need to keep searching.
6887 if (best && best->count == entry->count) {
6893 /* Prev == entry, not good enough, have to keep searching */
6894 if (!prev->broken && prev->count == entry->count)
6898 best = (prev->count > entry->count) ? prev : entry;
6899 else if (best->count < entry->count)
6907 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6908 struct data_backref *dback, struct extent_entry *entry)
6910 struct btrfs_trans_handle *trans;
6911 struct btrfs_root *root;
6912 struct btrfs_file_extent_item *fi;
6913 struct extent_buffer *leaf;
6914 struct btrfs_key key;
6918 key.objectid = dback->root;
6919 key.type = BTRFS_ROOT_ITEM_KEY;
6920 key.offset = (u64)-1;
6921 root = btrfs_read_fs_root(info, &key);
6923 fprintf(stderr, "Couldn't find root for our ref\n");
6928 * The backref points to the original offset of the extent if it was
6929 * split, so we need to search down to the offset we have and then walk
6930 * forward until we find the backref we're looking for.
6932 key.objectid = dback->owner;
6933 key.type = BTRFS_EXTENT_DATA_KEY;
6934 key.offset = dback->offset;
6935 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6937 fprintf(stderr, "Error looking up ref %d\n", ret);
6942 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6943 ret = btrfs_next_leaf(root, path);
6945 fprintf(stderr, "Couldn't find our ref, next\n");
6949 leaf = path->nodes[0];
6950 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6951 if (key.objectid != dback->owner ||
6952 key.type != BTRFS_EXTENT_DATA_KEY) {
6953 fprintf(stderr, "Couldn't find our ref, search\n");
6956 fi = btrfs_item_ptr(leaf, path->slots[0],
6957 struct btrfs_file_extent_item);
6958 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6959 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6961 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6966 btrfs_release_path(path);
6968 trans = btrfs_start_transaction(root, 1);
6970 return PTR_ERR(trans);
6973 * Ok we have the key of the file extent we want to fix, now we can cow
6974 * down to the thing and fix it.
6976 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6978 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6979 key.objectid, key.type, key.offset, ret);
6983 fprintf(stderr, "Well that's odd, we just found this key "
6984 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6989 leaf = path->nodes[0];
6990 fi = btrfs_item_ptr(leaf, path->slots[0],
6991 struct btrfs_file_extent_item);
6993 if (btrfs_file_extent_compression(leaf, fi) &&
6994 dback->disk_bytenr != entry->bytenr) {
6995 fprintf(stderr, "Ref doesn't match the record start and is "
6996 "compressed, please take a btrfs-image of this file "
6997 "system and send it to a btrfs developer so they can "
6998 "complete this functionality for bytenr %Lu\n",
6999 dback->disk_bytenr);
7004 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7005 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7006 } else if (dback->disk_bytenr > entry->bytenr) {
7007 u64 off_diff, offset;
7009 off_diff = dback->disk_bytenr - entry->bytenr;
7010 offset = btrfs_file_extent_offset(leaf, fi);
7011 if (dback->disk_bytenr + offset +
7012 btrfs_file_extent_num_bytes(leaf, fi) >
7013 entry->bytenr + entry->bytes) {
7014 fprintf(stderr, "Ref is past the entry end, please "
7015 "take a btrfs-image of this file system and "
7016 "send it to a btrfs developer, ref %Lu\n",
7017 dback->disk_bytenr);
7022 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7023 btrfs_set_file_extent_offset(leaf, fi, offset);
7024 } else if (dback->disk_bytenr < entry->bytenr) {
7027 offset = btrfs_file_extent_offset(leaf, fi);
7028 if (dback->disk_bytenr + offset < entry->bytenr) {
7029 fprintf(stderr, "Ref is before the entry start, please"
7030 " take a btrfs-image of this file system and "
7031 "send it to a btrfs developer, ref %Lu\n",
7032 dback->disk_bytenr);
7037 offset += dback->disk_bytenr;
7038 offset -= entry->bytenr;
7039 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7040 btrfs_set_file_extent_offset(leaf, fi, offset);
7043 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7046 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7047 * only do this if we aren't using compression, otherwise it's a
7050 if (!btrfs_file_extent_compression(leaf, fi))
7051 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7053 printf("ram bytes may be wrong?\n");
7054 btrfs_mark_buffer_dirty(leaf);
7056 err = btrfs_commit_transaction(trans, root);
7057 btrfs_release_path(path);
7058 return ret ? ret : err;
7061 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7062 struct extent_record *rec)
7064 struct extent_backref *back, *tmp;
7065 struct data_backref *dback;
7066 struct extent_entry *entry, *best = NULL;
7069 int broken_entries = 0;
7074 * Metadata is easy and the backrefs should always agree on bytenr and
7075 * size, if not we've got bigger issues.
7080 rbtree_postorder_for_each_entry_safe(back, tmp,
7081 &rec->backref_tree, node) {
7082 if (back->full_backref || !back->is_data)
7085 dback = to_data_backref(back);
7088 * We only pay attention to backrefs that we found a real
7091 if (dback->found_ref == 0)
7095 * For now we only catch when the bytes don't match, not the
7096 * bytenr. We can easily do this at the same time, but I want
7097 * to have a fs image to test on before we just add repair
7098 * functionality willy-nilly so we know we won't screw up the
7102 entry = find_entry(&entries, dback->disk_bytenr,
7105 entry = malloc(sizeof(struct extent_entry));
7110 memset(entry, 0, sizeof(*entry));
7111 entry->bytenr = dback->disk_bytenr;
7112 entry->bytes = dback->bytes;
7113 list_add_tail(&entry->list, &entries);
7118 * If we only have on entry we may think the entries agree when
7119 * in reality they don't so we have to do some extra checking.
7121 if (dback->disk_bytenr != rec->start ||
7122 dback->bytes != rec->nr || back->broken)
7133 /* Yay all the backrefs agree, carry on good sir */
7134 if (nr_entries <= 1 && !mismatch)
7137 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7138 "%Lu\n", rec->start);
7141 * First we want to see if the backrefs can agree amongst themselves who
7142 * is right, so figure out which one of the entries has the highest
7145 best = find_most_right_entry(&entries);
7148 * Ok so we may have an even split between what the backrefs think, so
7149 * this is where we use the extent ref to see what it thinks.
7152 entry = find_entry(&entries, rec->start, rec->nr);
7153 if (!entry && (!broken_entries || !rec->found_rec)) {
7154 fprintf(stderr, "Backrefs don't agree with each other "
7155 "and extent record doesn't agree with anybody,"
7156 " so we can't fix bytenr %Lu bytes %Lu\n",
7157 rec->start, rec->nr);
7160 } else if (!entry) {
7162 * Ok our backrefs were broken, we'll assume this is the
7163 * correct value and add an entry for this range.
7165 entry = malloc(sizeof(struct extent_entry));
7170 memset(entry, 0, sizeof(*entry));
7171 entry->bytenr = rec->start;
7172 entry->bytes = rec->nr;
7173 list_add_tail(&entry->list, &entries);
7177 best = find_most_right_entry(&entries);
7179 fprintf(stderr, "Backrefs and extent record evenly "
7180 "split on who is right, this is going to "
7181 "require user input to fix bytenr %Lu bytes "
7182 "%Lu\n", rec->start, rec->nr);
7189 * I don't think this can happen currently as we'll abort() if we catch
7190 * this case higher up, but in case somebody removes that we still can't
7191 * deal with it properly here yet, so just bail out of that's the case.
7193 if (best->bytenr != rec->start) {
7194 fprintf(stderr, "Extent start and backref starts don't match, "
7195 "please use btrfs-image on this file system and send "
7196 "it to a btrfs developer so they can make fsck fix "
7197 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7198 rec->start, rec->nr);
7204 * Ok great we all agreed on an extent record, let's go find the real
7205 * references and fix up the ones that don't match.
7207 rbtree_postorder_for_each_entry_safe(back, tmp,
7208 &rec->backref_tree, node) {
7209 if (back->full_backref || !back->is_data)
7212 dback = to_data_backref(back);
7215 * Still ignoring backrefs that don't have a real ref attached
7218 if (dback->found_ref == 0)
7221 if (dback->bytes == best->bytes &&
7222 dback->disk_bytenr == best->bytenr)
7225 ret = repair_ref(info, path, dback, best);
7231 * Ok we messed with the actual refs, which means we need to drop our
7232 * entire cache and go back and rescan. I know this is a huge pain and
7233 * adds a lot of extra work, but it's the only way to be safe. Once all
7234 * the backrefs agree we may not need to do anything to the extent
7239 while (!list_empty(&entries)) {
7240 entry = list_entry(entries.next, struct extent_entry, list);
7241 list_del_init(&entry->list);
7247 static int process_duplicates(struct btrfs_root *root,
7248 struct cache_tree *extent_cache,
7249 struct extent_record *rec)
7251 struct extent_record *good, *tmp;
7252 struct cache_extent *cache;
7256 * If we found a extent record for this extent then return, or if we
7257 * have more than one duplicate we are likely going to need to delete
7260 if (rec->found_rec || rec->num_duplicates > 1)
7263 /* Shouldn't happen but just in case */
7264 BUG_ON(!rec->num_duplicates);
7267 * So this happens if we end up with a backref that doesn't match the
7268 * actual extent entry. So either the backref is bad or the extent
7269 * entry is bad. Either way we want to have the extent_record actually
7270 * reflect what we found in the extent_tree, so we need to take the
7271 * duplicate out and use that as the extent_record since the only way we
7272 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7274 remove_cache_extent(extent_cache, &rec->cache);
7276 good = to_extent_record(rec->dups.next);
7277 list_del_init(&good->list);
7278 INIT_LIST_HEAD(&good->backrefs);
7279 INIT_LIST_HEAD(&good->dups);
7280 good->cache.start = good->start;
7281 good->cache.size = good->nr;
7282 good->content_checked = 0;
7283 good->owner_ref_checked = 0;
7284 good->num_duplicates = 0;
7285 good->refs = rec->refs;
7286 list_splice_init(&rec->backrefs, &good->backrefs);
7288 cache = lookup_cache_extent(extent_cache, good->start,
7292 tmp = container_of(cache, struct extent_record, cache);
7295 * If we find another overlapping extent and it's found_rec is
7296 * set then it's a duplicate and we need to try and delete
7299 if (tmp->found_rec || tmp->num_duplicates > 0) {
7300 if (list_empty(&good->list))
7301 list_add_tail(&good->list,
7302 &duplicate_extents);
7303 good->num_duplicates += tmp->num_duplicates + 1;
7304 list_splice_init(&tmp->dups, &good->dups);
7305 list_del_init(&tmp->list);
7306 list_add_tail(&tmp->list, &good->dups);
7307 remove_cache_extent(extent_cache, &tmp->cache);
7312 * Ok we have another non extent item backed extent rec, so lets
7313 * just add it to this extent and carry on like we did above.
7315 good->refs += tmp->refs;
7316 list_splice_init(&tmp->backrefs, &good->backrefs);
7317 remove_cache_extent(extent_cache, &tmp->cache);
7320 ret = insert_cache_extent(extent_cache, &good->cache);
7323 return good->num_duplicates ? 0 : 1;
7326 static int delete_duplicate_records(struct btrfs_root *root,
7327 struct extent_record *rec)
7329 struct btrfs_trans_handle *trans;
7330 LIST_HEAD(delete_list);
7331 struct btrfs_path *path;
7332 struct extent_record *tmp, *good, *n;
7335 struct btrfs_key key;
7337 path = btrfs_alloc_path();
7344 /* Find the record that covers all of the duplicates. */
7345 list_for_each_entry(tmp, &rec->dups, list) {
7346 if (good->start < tmp->start)
7348 if (good->nr > tmp->nr)
7351 if (tmp->start + tmp->nr < good->start + good->nr) {
7352 fprintf(stderr, "Ok we have overlapping extents that "
7353 "aren't completely covered by each other, this "
7354 "is going to require more careful thought. "
7355 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7356 tmp->start, tmp->nr, good->start, good->nr);
7363 list_add_tail(&rec->list, &delete_list);
7365 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7368 list_move_tail(&tmp->list, &delete_list);
7371 root = root->fs_info->extent_root;
7372 trans = btrfs_start_transaction(root, 1);
7373 if (IS_ERR(trans)) {
7374 ret = PTR_ERR(trans);
7378 list_for_each_entry(tmp, &delete_list, list) {
7379 if (tmp->found_rec == 0)
7381 key.objectid = tmp->start;
7382 key.type = BTRFS_EXTENT_ITEM_KEY;
7383 key.offset = tmp->nr;
7385 /* Shouldn't happen but just in case */
7386 if (tmp->metadata) {
7387 fprintf(stderr, "Well this shouldn't happen, extent "
7388 "record overlaps but is metadata? "
7389 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7393 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7399 ret = btrfs_del_item(trans, root, path);
7402 btrfs_release_path(path);
7405 err = btrfs_commit_transaction(trans, root);
7409 while (!list_empty(&delete_list)) {
7410 tmp = to_extent_record(delete_list.next);
7411 list_del_init(&tmp->list);
7417 while (!list_empty(&rec->dups)) {
7418 tmp = to_extent_record(rec->dups.next);
7419 list_del_init(&tmp->list);
7423 btrfs_free_path(path);
7425 if (!ret && !nr_del)
7426 rec->num_duplicates = 0;
7428 return ret ? ret : nr_del;
7431 static int find_possible_backrefs(struct btrfs_fs_info *info,
7432 struct btrfs_path *path,
7433 struct cache_tree *extent_cache,
7434 struct extent_record *rec)
7436 struct btrfs_root *root;
7437 struct extent_backref *back, *tmp;
7438 struct data_backref *dback;
7439 struct cache_extent *cache;
7440 struct btrfs_file_extent_item *fi;
7441 struct btrfs_key key;
7445 rbtree_postorder_for_each_entry_safe(back, tmp,
7446 &rec->backref_tree, node) {
7447 /* Don't care about full backrefs (poor unloved backrefs) */
7448 if (back->full_backref || !back->is_data)
7451 dback = to_data_backref(back);
7453 /* We found this one, we don't need to do a lookup */
7454 if (dback->found_ref)
7457 key.objectid = dback->root;
7458 key.type = BTRFS_ROOT_ITEM_KEY;
7459 key.offset = (u64)-1;
7461 root = btrfs_read_fs_root(info, &key);
7463 /* No root, definitely a bad ref, skip */
7464 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7466 /* Other err, exit */
7468 return PTR_ERR(root);
7470 key.objectid = dback->owner;
7471 key.type = BTRFS_EXTENT_DATA_KEY;
7472 key.offset = dback->offset;
7473 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7475 btrfs_release_path(path);
7478 /* Didn't find it, we can carry on */
7483 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7484 struct btrfs_file_extent_item);
7485 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7486 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7487 btrfs_release_path(path);
7488 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7490 struct extent_record *tmp;
7491 tmp = container_of(cache, struct extent_record, cache);
7494 * If we found an extent record for the bytenr for this
7495 * particular backref then we can't add it to our
7496 * current extent record. We only want to add backrefs
7497 * that don't have a corresponding extent item in the
7498 * extent tree since they likely belong to this record
7499 * and we need to fix it if it doesn't match bytenrs.
7505 dback->found_ref += 1;
7506 dback->disk_bytenr = bytenr;
7507 dback->bytes = bytes;
7510 * Set this so the verify backref code knows not to trust the
7511 * values in this backref.
7520 * Record orphan data ref into corresponding root.
7522 * Return 0 if the extent item contains data ref and recorded.
7523 * Return 1 if the extent item contains no useful data ref
7524 * On that case, it may contains only shared_dataref or metadata backref
7525 * or the file extent exists(this should be handled by the extent bytenr
7527 * Return <0 if something goes wrong.
7529 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7530 struct extent_record *rec)
7532 struct btrfs_key key;
7533 struct btrfs_root *dest_root;
7534 struct extent_backref *back, *tmp;
7535 struct data_backref *dback;
7536 struct orphan_data_extent *orphan;
7537 struct btrfs_path *path;
7538 int recorded_data_ref = 0;
7543 path = btrfs_alloc_path();
7546 rbtree_postorder_for_each_entry_safe(back, tmp,
7547 &rec->backref_tree, node) {
7548 if (back->full_backref || !back->is_data ||
7549 !back->found_extent_tree)
7551 dback = to_data_backref(back);
7552 if (dback->found_ref)
7554 key.objectid = dback->root;
7555 key.type = BTRFS_ROOT_ITEM_KEY;
7556 key.offset = (u64)-1;
7558 dest_root = btrfs_read_fs_root(fs_info, &key);
7560 /* For non-exist root we just skip it */
7561 if (IS_ERR(dest_root) || !dest_root)
7564 key.objectid = dback->owner;
7565 key.type = BTRFS_EXTENT_DATA_KEY;
7566 key.offset = dback->offset;
7568 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7570 * For ret < 0, it's OK since the fs-tree may be corrupted,
7571 * we need to record it for inode/file extent rebuild.
7572 * For ret > 0, we record it only for file extent rebuild.
7573 * For ret == 0, the file extent exists but only bytenr
7574 * mismatch, let the original bytenr fix routine to handle,
7580 orphan = malloc(sizeof(*orphan));
7585 INIT_LIST_HEAD(&orphan->list);
7586 orphan->root = dback->root;
7587 orphan->objectid = dback->owner;
7588 orphan->offset = dback->offset;
7589 orphan->disk_bytenr = rec->cache.start;
7590 orphan->disk_len = rec->cache.size;
7591 list_add(&dest_root->orphan_data_extents, &orphan->list);
7592 recorded_data_ref = 1;
7595 btrfs_free_path(path);
7597 return !recorded_data_ref;
7603 * when an incorrect extent item is found, this will delete
7604 * all of the existing entries for it and recreate them
7605 * based on what the tree scan found.
7607 static int fixup_extent_refs(struct btrfs_fs_info *info,
7608 struct cache_tree *extent_cache,
7609 struct extent_record *rec)
7611 struct btrfs_trans_handle *trans = NULL;
7613 struct btrfs_path *path;
7614 struct cache_extent *cache;
7615 struct extent_backref *back, *tmp;
7619 if (rec->flag_block_full_backref)
7620 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7622 path = btrfs_alloc_path();
7626 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7628 * Sometimes the backrefs themselves are so broken they don't
7629 * get attached to any meaningful rec, so first go back and
7630 * check any of our backrefs that we couldn't find and throw
7631 * them into the list if we find the backref so that
7632 * verify_backrefs can figure out what to do.
7634 ret = find_possible_backrefs(info, path, extent_cache, rec);
7639 /* step one, make sure all of the backrefs agree */
7640 ret = verify_backrefs(info, path, rec);
7644 trans = btrfs_start_transaction(info->extent_root, 1);
7645 if (IS_ERR(trans)) {
7646 ret = PTR_ERR(trans);
7650 /* step two, delete all the existing records */
7651 ret = delete_extent_records(trans, info->extent_root, path,
7652 rec->start, rec->max_size);
7657 /* was this block corrupt? If so, don't add references to it */
7658 cache = lookup_cache_extent(info->corrupt_blocks,
7659 rec->start, rec->max_size);
7665 /* step three, recreate all the refs we did find */
7666 rbtree_postorder_for_each_entry_safe(back, tmp,
7667 &rec->backref_tree, node) {
7669 * if we didn't find any references, don't create a
7672 if (!back->found_ref)
7675 rec->bad_full_backref = 0;
7676 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7684 int err = btrfs_commit_transaction(trans, info->extent_root);
7689 btrfs_free_path(path);
7693 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7694 struct extent_record *rec)
7696 struct btrfs_trans_handle *trans;
7697 struct btrfs_root *root = fs_info->extent_root;
7698 struct btrfs_path *path;
7699 struct btrfs_extent_item *ei;
7700 struct btrfs_key key;
7704 key.objectid = rec->start;
7705 if (rec->metadata) {
7706 key.type = BTRFS_METADATA_ITEM_KEY;
7707 key.offset = rec->info_level;
7709 key.type = BTRFS_EXTENT_ITEM_KEY;
7710 key.offset = rec->max_size;
7713 path = btrfs_alloc_path();
7717 trans = btrfs_start_transaction(root, 0);
7718 if (IS_ERR(trans)) {
7719 btrfs_free_path(path);
7720 return PTR_ERR(trans);
7723 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7725 btrfs_free_path(path);
7726 btrfs_commit_transaction(trans, root);
7729 fprintf(stderr, "Didn't find extent for %llu\n",
7730 (unsigned long long)rec->start);
7731 btrfs_free_path(path);
7732 btrfs_commit_transaction(trans, root);
7736 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7737 struct btrfs_extent_item);
7738 flags = btrfs_extent_flags(path->nodes[0], ei);
7739 if (rec->flag_block_full_backref) {
7740 fprintf(stderr, "setting full backref on %llu\n",
7741 (unsigned long long)key.objectid);
7742 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7744 fprintf(stderr, "clearing full backref on %llu\n",
7745 (unsigned long long)key.objectid);
7746 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7748 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7749 btrfs_mark_buffer_dirty(path->nodes[0]);
7750 btrfs_free_path(path);
7751 return btrfs_commit_transaction(trans, root);
7754 /* right now we only prune from the extent allocation tree */
7755 static int prune_one_block(struct btrfs_trans_handle *trans,
7756 struct btrfs_fs_info *info,
7757 struct btrfs_corrupt_block *corrupt)
7760 struct btrfs_path path;
7761 struct extent_buffer *eb;
7765 int level = corrupt->level + 1;
7767 btrfs_init_path(&path);
7769 /* we want to stop at the parent to our busted block */
7770 path.lowest_level = level;
7772 ret = btrfs_search_slot(trans, info->extent_root,
7773 &corrupt->key, &path, -1, 1);
7778 eb = path.nodes[level];
7785 * hopefully the search gave us the block we want to prune,
7786 * lets try that first
7788 slot = path.slots[level];
7789 found = btrfs_node_blockptr(eb, slot);
7790 if (found == corrupt->cache.start)
7793 nritems = btrfs_header_nritems(eb);
7795 /* the search failed, lets scan this node and hope we find it */
7796 for (slot = 0; slot < nritems; slot++) {
7797 found = btrfs_node_blockptr(eb, slot);
7798 if (found == corrupt->cache.start)
7802 * we couldn't find the bad block. TODO, search all the nodes for pointers
7805 if (eb == info->extent_root->node) {
7810 btrfs_release_path(&path);
7815 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7816 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7819 btrfs_release_path(&path);
7823 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7825 struct btrfs_trans_handle *trans = NULL;
7826 struct cache_extent *cache;
7827 struct btrfs_corrupt_block *corrupt;
7830 cache = search_cache_extent(info->corrupt_blocks, 0);
7834 trans = btrfs_start_transaction(info->extent_root, 1);
7836 return PTR_ERR(trans);
7838 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7839 prune_one_block(trans, info, corrupt);
7840 remove_cache_extent(info->corrupt_blocks, cache);
7843 return btrfs_commit_transaction(trans, info->extent_root);
7847 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7849 struct btrfs_block_group_cache *cache;
7854 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7855 &start, &end, EXTENT_DIRTY);
7858 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7864 cache = btrfs_lookup_first_block_group(fs_info, start);
7869 start = cache->key.objectid + cache->key.offset;
7873 static int check_extent_refs(struct btrfs_root *root,
7874 struct cache_tree *extent_cache)
7876 struct extent_record *rec;
7877 struct cache_extent *cache;
7886 * if we're doing a repair, we have to make sure
7887 * we don't allocate from the problem extents.
7888 * In the worst case, this will be all the
7891 cache = search_cache_extent(extent_cache, 0);
7893 rec = container_of(cache, struct extent_record, cache);
7894 set_extent_dirty(root->fs_info->excluded_extents,
7896 rec->start + rec->max_size - 1,
7898 cache = next_cache_extent(cache);
7901 /* pin down all the corrupted blocks too */
7902 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7904 set_extent_dirty(root->fs_info->excluded_extents,
7906 cache->start + cache->size - 1,
7908 cache = next_cache_extent(cache);
7910 prune_corrupt_blocks(root->fs_info);
7911 reset_cached_block_groups(root->fs_info);
7914 reset_cached_block_groups(root->fs_info);
7917 * We need to delete any duplicate entries we find first otherwise we
7918 * could mess up the extent tree when we have backrefs that actually
7919 * belong to a different extent item and not the weird duplicate one.
7921 while (repair && !list_empty(&duplicate_extents)) {
7922 rec = to_extent_record(duplicate_extents.next);
7923 list_del_init(&rec->list);
7925 /* Sometimes we can find a backref before we find an actual
7926 * extent, so we need to process it a little bit to see if there
7927 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7928 * if this is a backref screwup. If we need to delete stuff
7929 * process_duplicates() will return 0, otherwise it will return
7932 if (process_duplicates(root, extent_cache, rec))
7934 ret = delete_duplicate_records(root, rec);
7938 * delete_duplicate_records will return the number of entries
7939 * deleted, so if it's greater than 0 then we know we actually
7940 * did something and we need to remove.
7954 cache = search_cache_extent(extent_cache, 0);
7957 rec = container_of(cache, struct extent_record, cache);
7958 if (rec->num_duplicates) {
7959 fprintf(stderr, "extent item %llu has multiple extent "
7960 "items\n", (unsigned long long)rec->start);
7965 if (rec->refs != rec->extent_item_refs) {
7966 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7967 (unsigned long long)rec->start,
7968 (unsigned long long)rec->nr);
7969 fprintf(stderr, "extent item %llu, found %llu\n",
7970 (unsigned long long)rec->extent_item_refs,
7971 (unsigned long long)rec->refs);
7972 ret = record_orphan_data_extents(root->fs_info, rec);
7979 * we can't use the extent to repair file
7980 * extent, let the fallback method handle it.
7982 if (!fixed && repair) {
7983 ret = fixup_extent_refs(
7994 if (all_backpointers_checked(rec, 1)) {
7995 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7996 (unsigned long long)rec->start,
7997 (unsigned long long)rec->nr);
7999 if (!fixed && !recorded && repair) {
8000 ret = fixup_extent_refs(root->fs_info,
8009 if (!rec->owner_ref_checked) {
8010 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8011 (unsigned long long)rec->start,
8012 (unsigned long long)rec->nr);
8013 if (!fixed && !recorded && repair) {
8014 ret = fixup_extent_refs(root->fs_info,
8023 if (rec->bad_full_backref) {
8024 fprintf(stderr, "bad full backref, on [%llu]\n",
8025 (unsigned long long)rec->start);
8027 ret = fixup_extent_flags(root->fs_info, rec);
8036 * Although it's not a extent ref's problem, we reuse this
8037 * routine for error reporting.
8038 * No repair function yet.
8040 if (rec->crossing_stripes) {
8042 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8043 rec->start, rec->start + rec->max_size);
8048 if (rec->wrong_chunk_type) {
8050 "bad extent [%llu, %llu), type mismatch with chunk\n",
8051 rec->start, rec->start + rec->max_size);
8056 remove_cache_extent(extent_cache, cache);
8057 free_all_extent_backrefs(rec);
8058 if (!init_extent_tree && repair && (!cur_err || fixed))
8059 clear_extent_dirty(root->fs_info->excluded_extents,
8061 rec->start + rec->max_size - 1,
8067 if (ret && ret != -EAGAIN) {
8068 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8071 struct btrfs_trans_handle *trans;
8073 root = root->fs_info->extent_root;
8074 trans = btrfs_start_transaction(root, 1);
8075 if (IS_ERR(trans)) {
8076 ret = PTR_ERR(trans);
8080 btrfs_fix_block_accounting(trans, root);
8081 ret = btrfs_commit_transaction(trans, root);
8086 fprintf(stderr, "repaired damaged extent references\n");
8092 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8096 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8097 stripe_size = length;
8098 stripe_size /= num_stripes;
8099 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8100 stripe_size = length * 2;
8101 stripe_size /= num_stripes;
8102 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8103 stripe_size = length;
8104 stripe_size /= (num_stripes - 1);
8105 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8106 stripe_size = length;
8107 stripe_size /= (num_stripes - 2);
8109 stripe_size = length;
8115 * Check the chunk with its block group/dev list ref:
8116 * Return 0 if all refs seems valid.
8117 * Return 1 if part of refs seems valid, need later check for rebuild ref
8118 * like missing block group and needs to search extent tree to rebuild them.
8119 * Return -1 if essential refs are missing and unable to rebuild.
8121 static int check_chunk_refs(struct chunk_record *chunk_rec,
8122 struct block_group_tree *block_group_cache,
8123 struct device_extent_tree *dev_extent_cache,
8126 struct cache_extent *block_group_item;
8127 struct block_group_record *block_group_rec;
8128 struct cache_extent *dev_extent_item;
8129 struct device_extent_record *dev_extent_rec;
8133 int metadump_v2 = 0;
8137 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8140 if (block_group_item) {
8141 block_group_rec = container_of(block_group_item,
8142 struct block_group_record,
8144 if (chunk_rec->length != block_group_rec->offset ||
8145 chunk_rec->offset != block_group_rec->objectid ||
8147 chunk_rec->type_flags != block_group_rec->flags)) {
8150 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8151 chunk_rec->objectid,
8156 chunk_rec->type_flags,
8157 block_group_rec->objectid,
8158 block_group_rec->type,
8159 block_group_rec->offset,
8160 block_group_rec->offset,
8161 block_group_rec->objectid,
8162 block_group_rec->flags);
8165 list_del_init(&block_group_rec->list);
8166 chunk_rec->bg_rec = block_group_rec;
8171 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8172 chunk_rec->objectid,
8177 chunk_rec->type_flags);
8184 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8185 chunk_rec->num_stripes);
8186 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8187 devid = chunk_rec->stripes[i].devid;
8188 offset = chunk_rec->stripes[i].offset;
8189 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8190 devid, offset, length);
8191 if (dev_extent_item) {
8192 dev_extent_rec = container_of(dev_extent_item,
8193 struct device_extent_record,
8195 if (dev_extent_rec->objectid != devid ||
8196 dev_extent_rec->offset != offset ||
8197 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8198 dev_extent_rec->length != length) {
8201 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8202 chunk_rec->objectid,
8205 chunk_rec->stripes[i].devid,
8206 chunk_rec->stripes[i].offset,
8207 dev_extent_rec->objectid,
8208 dev_extent_rec->offset,
8209 dev_extent_rec->length);
8212 list_move(&dev_extent_rec->chunk_list,
8213 &chunk_rec->dextents);
8218 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8219 chunk_rec->objectid,
8222 chunk_rec->stripes[i].devid,
8223 chunk_rec->stripes[i].offset);
8230 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8231 int check_chunks(struct cache_tree *chunk_cache,
8232 struct block_group_tree *block_group_cache,
8233 struct device_extent_tree *dev_extent_cache,
8234 struct list_head *good, struct list_head *bad,
8235 struct list_head *rebuild, int silent)
8237 struct cache_extent *chunk_item;
8238 struct chunk_record *chunk_rec;
8239 struct block_group_record *bg_rec;
8240 struct device_extent_record *dext_rec;
8244 chunk_item = first_cache_extent(chunk_cache);
8245 while (chunk_item) {
8246 chunk_rec = container_of(chunk_item, struct chunk_record,
8248 err = check_chunk_refs(chunk_rec, block_group_cache,
8249 dev_extent_cache, silent);
8252 if (err == 0 && good)
8253 list_add_tail(&chunk_rec->list, good);
8254 if (err > 0 && rebuild)
8255 list_add_tail(&chunk_rec->list, rebuild);
8257 list_add_tail(&chunk_rec->list, bad);
8258 chunk_item = next_cache_extent(chunk_item);
8261 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8264 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8272 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8276 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8287 static int check_device_used(struct device_record *dev_rec,
8288 struct device_extent_tree *dext_cache)
8290 struct cache_extent *cache;
8291 struct device_extent_record *dev_extent_rec;
8294 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8296 dev_extent_rec = container_of(cache,
8297 struct device_extent_record,
8299 if (dev_extent_rec->objectid != dev_rec->devid)
8302 list_del_init(&dev_extent_rec->device_list);
8303 total_byte += dev_extent_rec->length;
8304 cache = next_cache_extent(cache);
8307 if (total_byte != dev_rec->byte_used) {
8309 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8310 total_byte, dev_rec->byte_used, dev_rec->objectid,
8311 dev_rec->type, dev_rec->offset);
8318 /* check btrfs_dev_item -> btrfs_dev_extent */
8319 static int check_devices(struct rb_root *dev_cache,
8320 struct device_extent_tree *dev_extent_cache)
8322 struct rb_node *dev_node;
8323 struct device_record *dev_rec;
8324 struct device_extent_record *dext_rec;
8328 dev_node = rb_first(dev_cache);
8330 dev_rec = container_of(dev_node, struct device_record, node);
8331 err = check_device_used(dev_rec, dev_extent_cache);
8335 dev_node = rb_next(dev_node);
8337 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8340 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8341 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8348 static int add_root_item_to_list(struct list_head *head,
8349 u64 objectid, u64 bytenr, u64 last_snapshot,
8350 u8 level, u8 drop_level,
8351 int level_size, struct btrfs_key *drop_key)
8354 struct root_item_record *ri_rec;
8355 ri_rec = malloc(sizeof(*ri_rec));
8358 ri_rec->bytenr = bytenr;
8359 ri_rec->objectid = objectid;
8360 ri_rec->level = level;
8361 ri_rec->level_size = level_size;
8362 ri_rec->drop_level = drop_level;
8363 ri_rec->last_snapshot = last_snapshot;
8365 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8366 list_add_tail(&ri_rec->list, head);
8371 static void free_root_item_list(struct list_head *list)
8373 struct root_item_record *ri_rec;
8375 while (!list_empty(list)) {
8376 ri_rec = list_first_entry(list, struct root_item_record,
8378 list_del_init(&ri_rec->list);
8383 static int deal_root_from_list(struct list_head *list,
8384 struct btrfs_root *root,
8385 struct block_info *bits,
8387 struct cache_tree *pending,
8388 struct cache_tree *seen,
8389 struct cache_tree *reada,
8390 struct cache_tree *nodes,
8391 struct cache_tree *extent_cache,
8392 struct cache_tree *chunk_cache,
8393 struct rb_root *dev_cache,
8394 struct block_group_tree *block_group_cache,
8395 struct device_extent_tree *dev_extent_cache)
8400 while (!list_empty(list)) {
8401 struct root_item_record *rec;
8402 struct extent_buffer *buf;
8403 rec = list_entry(list->next,
8404 struct root_item_record, list);
8406 buf = read_tree_block(root->fs_info->tree_root,
8407 rec->bytenr, rec->level_size, 0);
8408 if (!extent_buffer_uptodate(buf)) {
8409 free_extent_buffer(buf);
8413 add_root_to_pending(buf, extent_cache, pending,
8414 seen, nodes, rec->objectid);
8416 * To rebuild extent tree, we need deal with snapshot
8417 * one by one, otherwise we deal with node firstly which
8418 * can maximize readahead.
8421 ret = run_next_block(root, bits, bits_nr, &last,
8422 pending, seen, reada, nodes,
8423 extent_cache, chunk_cache,
8424 dev_cache, block_group_cache,
8425 dev_extent_cache, rec);
8429 free_extent_buffer(buf);
8430 list_del(&rec->list);
8436 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8437 reada, nodes, extent_cache, chunk_cache,
8438 dev_cache, block_group_cache,
8439 dev_extent_cache, NULL);
8449 static int check_chunks_and_extents(struct btrfs_root *root)
8451 struct rb_root dev_cache;
8452 struct cache_tree chunk_cache;
8453 struct block_group_tree block_group_cache;
8454 struct device_extent_tree dev_extent_cache;
8455 struct cache_tree extent_cache;
8456 struct cache_tree seen;
8457 struct cache_tree pending;
8458 struct cache_tree reada;
8459 struct cache_tree nodes;
8460 struct extent_io_tree excluded_extents;
8461 struct cache_tree corrupt_blocks;
8462 struct btrfs_path path;
8463 struct btrfs_key key;
8464 struct btrfs_key found_key;
8466 struct block_info *bits;
8468 struct extent_buffer *leaf;
8470 struct btrfs_root_item ri;
8471 struct list_head dropping_trees;
8472 struct list_head normal_trees;
8473 struct btrfs_root *root1;
8478 dev_cache = RB_ROOT;
8479 cache_tree_init(&chunk_cache);
8480 block_group_tree_init(&block_group_cache);
8481 device_extent_tree_init(&dev_extent_cache);
8483 cache_tree_init(&extent_cache);
8484 cache_tree_init(&seen);
8485 cache_tree_init(&pending);
8486 cache_tree_init(&nodes);
8487 cache_tree_init(&reada);
8488 cache_tree_init(&corrupt_blocks);
8489 extent_io_tree_init(&excluded_extents);
8490 INIT_LIST_HEAD(&dropping_trees);
8491 INIT_LIST_HEAD(&normal_trees);
8494 root->fs_info->excluded_extents = &excluded_extents;
8495 root->fs_info->fsck_extent_cache = &extent_cache;
8496 root->fs_info->free_extent_hook = free_extent_hook;
8497 root->fs_info->corrupt_blocks = &corrupt_blocks;
8501 bits = malloc(bits_nr * sizeof(struct block_info));
8507 if (ctx.progress_enabled) {
8508 ctx.tp = TASK_EXTENTS;
8509 task_start(ctx.info);
8513 root1 = root->fs_info->tree_root;
8514 level = btrfs_header_level(root1->node);
8515 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8516 root1->node->start, 0, level, 0,
8517 root1->nodesize, NULL);
8520 root1 = root->fs_info->chunk_root;
8521 level = btrfs_header_level(root1->node);
8522 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8523 root1->node->start, 0, level, 0,
8524 root1->nodesize, NULL);
8527 btrfs_init_path(&path);
8530 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8531 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8536 leaf = path.nodes[0];
8537 slot = path.slots[0];
8538 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8539 ret = btrfs_next_leaf(root, &path);
8542 leaf = path.nodes[0];
8543 slot = path.slots[0];
8545 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8546 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8547 unsigned long offset;
8550 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8551 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8552 last_snapshot = btrfs_root_last_snapshot(&ri);
8553 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8554 level = btrfs_root_level(&ri);
8555 level_size = root->nodesize;
8556 ret = add_root_item_to_list(&normal_trees,
8558 btrfs_root_bytenr(&ri),
8559 last_snapshot, level,
8560 0, level_size, NULL);
8564 level = btrfs_root_level(&ri);
8565 level_size = root->nodesize;
8566 objectid = found_key.objectid;
8567 btrfs_disk_key_to_cpu(&found_key,
8569 ret = add_root_item_to_list(&dropping_trees,
8571 btrfs_root_bytenr(&ri),
8572 last_snapshot, level,
8574 level_size, &found_key);
8581 btrfs_release_path(&path);
8584 * check_block can return -EAGAIN if it fixes something, please keep
8585 * this in mind when dealing with return values from these functions, if
8586 * we get -EAGAIN we want to fall through and restart the loop.
8588 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8589 &seen, &reada, &nodes, &extent_cache,
8590 &chunk_cache, &dev_cache, &block_group_cache,
8597 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8598 &pending, &seen, &reada, &nodes,
8599 &extent_cache, &chunk_cache, &dev_cache,
8600 &block_group_cache, &dev_extent_cache);
8607 ret = check_chunks(&chunk_cache, &block_group_cache,
8608 &dev_extent_cache, NULL, NULL, NULL, 0);
8615 ret = check_extent_refs(root, &extent_cache);
8622 ret = check_devices(&dev_cache, &dev_extent_cache);
8627 task_stop(ctx.info);
8629 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8630 extent_io_tree_cleanup(&excluded_extents);
8631 root->fs_info->fsck_extent_cache = NULL;
8632 root->fs_info->free_extent_hook = NULL;
8633 root->fs_info->corrupt_blocks = NULL;
8634 root->fs_info->excluded_extents = NULL;
8637 free_chunk_cache_tree(&chunk_cache);
8638 free_device_cache_tree(&dev_cache);
8639 free_block_group_tree(&block_group_cache);
8640 free_device_extent_tree(&dev_extent_cache);
8641 free_extent_cache_tree(&seen);
8642 free_extent_cache_tree(&pending);
8643 free_extent_cache_tree(&reada);
8644 free_extent_cache_tree(&nodes);
8647 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8648 free_extent_cache_tree(&seen);
8649 free_extent_cache_tree(&pending);
8650 free_extent_cache_tree(&reada);
8651 free_extent_cache_tree(&nodes);
8652 free_chunk_cache_tree(&chunk_cache);
8653 free_block_group_tree(&block_group_cache);
8654 free_device_cache_tree(&dev_cache);
8655 free_device_extent_tree(&dev_extent_cache);
8656 free_extent_record_cache(root->fs_info, &extent_cache);
8657 free_root_item_list(&normal_trees);
8658 free_root_item_list(&dropping_trees);
8659 extent_io_tree_cleanup(&excluded_extents);
8664 * Check backrefs of a tree block given by @bytenr or @eb.
8666 * @root: the root containing the @bytenr or @eb
8667 * @eb: tree block extent buffer, can be NULL
8668 * @bytenr: bytenr of the tree block to search
8669 * @level: tree level of the tree block
8670 * @owner: owner of the tree block
8672 * Return >0 for any error found and output error message
8673 * Return 0 for no error found
8675 static int check_tree_block_ref(struct btrfs_root *root,
8676 struct extent_buffer *eb, u64 bytenr,
8677 int level, u64 owner)
8679 struct btrfs_key key;
8680 struct btrfs_root *extent_root = root->fs_info->extent_root;
8681 struct btrfs_path path;
8682 struct btrfs_extent_item *ei;
8683 struct btrfs_extent_inline_ref *iref;
8684 struct extent_buffer *leaf;
8690 u32 nodesize = root->nodesize;
8697 btrfs_init_path(&path);
8698 key.objectid = bytenr;
8699 if (btrfs_fs_incompat(root->fs_info,
8700 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8701 key.type = BTRFS_METADATA_ITEM_KEY;
8703 key.type = BTRFS_EXTENT_ITEM_KEY;
8704 key.offset = (u64)-1;
8706 /* Search for the backref in extent tree */
8707 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8709 err |= BACKREF_MISSING;
8712 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8714 err |= BACKREF_MISSING;
8718 leaf = path.nodes[0];
8719 slot = path.slots[0];
8720 btrfs_item_key_to_cpu(leaf, &key, slot);
8722 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8724 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8725 skinny_level = (int)key.offset;
8726 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8728 struct btrfs_tree_block_info *info;
8730 info = (struct btrfs_tree_block_info *)(ei + 1);
8731 skinny_level = btrfs_tree_block_level(leaf, info);
8732 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8739 if (!(btrfs_extent_flags(leaf, ei) &
8740 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8742 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8743 key.objectid, nodesize,
8744 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8745 err = BACKREF_MISMATCH;
8747 header_gen = btrfs_header_generation(eb);
8748 extent_gen = btrfs_extent_generation(leaf, ei);
8749 if (header_gen != extent_gen) {
8751 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8752 key.objectid, nodesize, header_gen,
8754 err = BACKREF_MISMATCH;
8756 if (level != skinny_level) {
8758 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8759 key.objectid, nodesize, level, skinny_level);
8760 err = BACKREF_MISMATCH;
8762 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8764 "extent[%llu %u] is referred by other roots than %llu",
8765 key.objectid, nodesize, root->objectid);
8766 err = BACKREF_MISMATCH;
8771 * Iterate the extent/metadata item to find the exact backref
8773 item_size = btrfs_item_size_nr(leaf, slot);
8774 ptr = (unsigned long)iref;
8775 end = (unsigned long)ei + item_size;
8777 iref = (struct btrfs_extent_inline_ref *)ptr;
8778 type = btrfs_extent_inline_ref_type(leaf, iref);
8779 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8781 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8782 (offset == root->objectid || offset == owner)) {
8784 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8785 /* Check if the backref points to valid referencer */
8786 found_ref = !check_tree_block_ref(root, NULL, offset,
8792 ptr += btrfs_extent_inline_ref_size(type);
8796 * Inlined extent item doesn't have what we need, check
8797 * TREE_BLOCK_REF_KEY
8800 btrfs_release_path(&path);
8801 key.objectid = bytenr;
8802 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8803 key.offset = root->objectid;
8805 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8810 err |= BACKREF_MISSING;
8812 btrfs_release_path(&path);
8813 if (eb && (err & BACKREF_MISSING))
8814 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8815 bytenr, nodesize, owner, level);
8820 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8822 * Return >0 any error found and output error message
8823 * Return 0 for no error found
8825 static int check_extent_data_item(struct btrfs_root *root,
8826 struct extent_buffer *eb, int slot)
8828 struct btrfs_file_extent_item *fi;
8829 struct btrfs_path path;
8830 struct btrfs_root *extent_root = root->fs_info->extent_root;
8831 struct btrfs_key fi_key;
8832 struct btrfs_key dbref_key;
8833 struct extent_buffer *leaf;
8834 struct btrfs_extent_item *ei;
8835 struct btrfs_extent_inline_ref *iref;
8836 struct btrfs_extent_data_ref *dref;
8838 u64 file_extent_gen;
8841 u64 extent_num_bytes;
8849 int found_dbackref = 0;
8853 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8854 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8855 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8857 /* Nothing to check for hole and inline data extents */
8858 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8859 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8862 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8863 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8864 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8866 /* Check unaligned disk_num_bytes and num_bytes */
8867 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8869 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8870 fi_key.objectid, fi_key.offset, disk_num_bytes,
8872 err |= BYTES_UNALIGNED;
8874 data_bytes_allocated += disk_num_bytes;
8876 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8878 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8879 fi_key.objectid, fi_key.offset, extent_num_bytes,
8881 err |= BYTES_UNALIGNED;
8883 data_bytes_referenced += extent_num_bytes;
8885 owner = btrfs_header_owner(eb);
8887 /* Check the extent item of the file extent in extent tree */
8888 btrfs_init_path(&path);
8889 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8890 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8891 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8893 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8895 err |= BACKREF_MISSING;
8899 leaf = path.nodes[0];
8900 slot = path.slots[0];
8901 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8903 extent_flags = btrfs_extent_flags(leaf, ei);
8904 extent_gen = btrfs_extent_generation(leaf, ei);
8906 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8908 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8909 disk_bytenr, disk_num_bytes,
8910 BTRFS_EXTENT_FLAG_DATA);
8911 err |= BACKREF_MISMATCH;
8914 if (file_extent_gen < extent_gen) {
8916 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8917 disk_bytenr, disk_num_bytes, file_extent_gen,
8919 err |= BACKREF_MISMATCH;
8922 /* Check data backref inside that extent item */
8923 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8924 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8925 ptr = (unsigned long)iref;
8926 end = (unsigned long)ei + item_size;
8928 iref = (struct btrfs_extent_inline_ref *)ptr;
8929 type = btrfs_extent_inline_ref_type(leaf, iref);
8930 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8932 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8933 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8934 if (ref_root == owner || ref_root == root->objectid)
8936 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8937 found_dbackref = !check_tree_block_ref(root, NULL,
8938 btrfs_extent_inline_ref_offset(leaf, iref),
8944 ptr += btrfs_extent_inline_ref_size(type);
8947 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8948 if (!found_dbackref) {
8949 btrfs_release_path(&path);
8951 btrfs_init_path(&path);
8952 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8953 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8954 dbref_key.offset = hash_extent_data_ref(root->objectid,
8955 fi_key.objectid, fi_key.offset);
8957 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8958 &dbref_key, &path, 0, 0);
8963 if (!found_dbackref)
8964 err |= BACKREF_MISSING;
8966 btrfs_release_path(&path);
8967 if (err & BACKREF_MISSING) {
8968 error("data extent[%llu %llu] backref lost",
8969 disk_bytenr, disk_num_bytes);
8975 * Get real tree block level for the case like shared block
8976 * Return >= 0 as tree level
8977 * Return <0 for error
8979 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8981 struct extent_buffer *eb;
8982 struct btrfs_path path;
8983 struct btrfs_key key;
8984 struct btrfs_extent_item *ei;
8987 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8992 /* Search extent tree for extent generation and level */
8993 key.objectid = bytenr;
8994 key.type = BTRFS_METADATA_ITEM_KEY;
8995 key.offset = (u64)-1;
8997 btrfs_init_path(&path);
8998 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9001 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9009 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9010 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9011 struct btrfs_extent_item);
9012 flags = btrfs_extent_flags(path.nodes[0], ei);
9013 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9018 /* Get transid for later read_tree_block() check */
9019 transid = btrfs_extent_generation(path.nodes[0], ei);
9021 /* Get backref level as one source */
9022 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9023 backref_level = key.offset;
9025 struct btrfs_tree_block_info *info;
9027 info = (struct btrfs_tree_block_info *)(ei + 1);
9028 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9030 btrfs_release_path(&path);
9032 /* Get level from tree block as an alternative source */
9033 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9034 if (!extent_buffer_uptodate(eb)) {
9035 free_extent_buffer(eb);
9038 header_level = btrfs_header_level(eb);
9039 free_extent_buffer(eb);
9041 if (header_level != backref_level)
9043 return header_level;
9046 btrfs_release_path(&path);
9051 * Check if a tree block backref is valid (points to a valid tree block)
9052 * if level == -1, level will be resolved
9053 * Return >0 for any error found and print error message
9055 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9056 u64 bytenr, int level)
9058 struct btrfs_root *root;
9059 struct btrfs_key key;
9060 struct btrfs_path path;
9061 struct extent_buffer *eb;
9062 struct extent_buffer *node;
9063 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9067 /* Query level for level == -1 special case */
9069 level = query_tree_block_level(fs_info, bytenr);
9071 err |= REFERENCER_MISSING;
9075 key.objectid = root_id;
9076 key.type = BTRFS_ROOT_ITEM_KEY;
9077 key.offset = (u64)-1;
9079 root = btrfs_read_fs_root(fs_info, &key);
9081 err |= REFERENCER_MISSING;
9085 /* Read out the tree block to get item/node key */
9086 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9087 if (!extent_buffer_uptodate(eb)) {
9088 err |= REFERENCER_MISSING;
9089 free_extent_buffer(eb);
9093 /* Empty tree, no need to check key */
9094 if (!btrfs_header_nritems(eb) && !level) {
9095 free_extent_buffer(eb);
9100 btrfs_node_key_to_cpu(eb, &key, 0);
9102 btrfs_item_key_to_cpu(eb, &key, 0);
9104 free_extent_buffer(eb);
9106 btrfs_init_path(&path);
9107 /* Search with the first key, to ensure we can reach it */
9108 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9110 err |= REFERENCER_MISSING;
9114 node = path.nodes[level];
9115 if (btrfs_header_bytenr(node) != bytenr) {
9117 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9118 bytenr, nodesize, bytenr,
9119 btrfs_header_bytenr(node));
9120 err |= REFERENCER_MISMATCH;
9122 if (btrfs_header_level(node) != level) {
9124 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9125 bytenr, nodesize, level,
9126 btrfs_header_level(node));
9127 err |= REFERENCER_MISMATCH;
9131 btrfs_release_path(&path);
9133 if (err & REFERENCER_MISSING) {
9135 error("extent [%llu %d] lost referencer (owner: %llu)",
9136 bytenr, nodesize, root_id);
9139 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9140 bytenr, nodesize, root_id, level);
9147 * Check referencer for shared block backref
9148 * If level == -1, this function will resolve the level.
9150 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9151 u64 parent, u64 bytenr, int level)
9153 struct extent_buffer *eb;
9154 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9156 int found_parent = 0;
9159 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9160 if (!extent_buffer_uptodate(eb))
9164 level = query_tree_block_level(fs_info, bytenr);
9168 if (level + 1 != btrfs_header_level(eb))
9171 nr = btrfs_header_nritems(eb);
9172 for (i = 0; i < nr; i++) {
9173 if (bytenr == btrfs_node_blockptr(eb, i)) {
9179 free_extent_buffer(eb);
9180 if (!found_parent) {
9182 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9183 bytenr, nodesize, parent, level);
9184 return REFERENCER_MISSING;
9190 * Check referencer for normal (inlined) data ref
9191 * If len == 0, it will be resolved by searching in extent tree
9193 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9194 u64 root_id, u64 objectid, u64 offset,
9195 u64 bytenr, u64 len, u32 count)
9197 struct btrfs_root *root;
9198 struct btrfs_root *extent_root = fs_info->extent_root;
9199 struct btrfs_key key;
9200 struct btrfs_path path;
9201 struct extent_buffer *leaf;
9202 struct btrfs_file_extent_item *fi;
9203 u32 found_count = 0;
9208 key.objectid = bytenr;
9209 key.type = BTRFS_EXTENT_ITEM_KEY;
9210 key.offset = (u64)-1;
9212 btrfs_init_path(&path);
9213 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9216 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9219 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9220 if (key.objectid != bytenr ||
9221 key.type != BTRFS_EXTENT_ITEM_KEY)
9224 btrfs_release_path(&path);
9226 key.objectid = root_id;
9227 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9228 key.offset = (u64)-1;
9229 btrfs_init_path(&path);
9231 root = btrfs_read_fs_root(fs_info, &key);
9235 key.objectid = objectid;
9236 key.type = BTRFS_EXTENT_DATA_KEY;
9238 * It can be nasty as data backref offset is
9239 * file offset - file extent offset, which is smaller or
9240 * equal to original backref offset. The only special case is
9241 * overflow. So we need to special check and do further search.
9243 key.offset = offset & (1ULL << 63) ? 0 : offset;
9245 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9250 * Search afterwards to get correct one
9251 * NOTE: As we must do a comprehensive check on the data backref to
9252 * make sure the dref count also matches, we must iterate all file
9253 * extents for that inode.
9256 leaf = path.nodes[0];
9257 slot = path.slots[0];
9259 btrfs_item_key_to_cpu(leaf, &key, slot);
9260 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9262 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9264 * Except normal disk bytenr and disk num bytes, we still
9265 * need to do extra check on dbackref offset as
9266 * dbackref offset = file_offset - file_extent_offset
9268 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9269 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9270 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9274 ret = btrfs_next_item(root, &path);
9279 btrfs_release_path(&path);
9280 if (found_count != count) {
9282 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9283 bytenr, len, root_id, objectid, offset, count, found_count);
9284 return REFERENCER_MISSING;
9290 * Check if the referencer of a shared data backref exists
9292 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9293 u64 parent, u64 bytenr)
9295 struct extent_buffer *eb;
9296 struct btrfs_key key;
9297 struct btrfs_file_extent_item *fi;
9298 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9300 int found_parent = 0;
9303 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9304 if (!extent_buffer_uptodate(eb))
9307 nr = btrfs_header_nritems(eb);
9308 for (i = 0; i < nr; i++) {
9309 btrfs_item_key_to_cpu(eb, &key, i);
9310 if (key.type != BTRFS_EXTENT_DATA_KEY)
9313 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9314 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9317 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9324 free_extent_buffer(eb);
9325 if (!found_parent) {
9326 error("shared extent %llu referencer lost (parent: %llu)",
9328 return REFERENCER_MISSING;
9334 * This function will check a given extent item, including its backref and
9335 * itself (like crossing stripe boundary and type)
9337 * Since we don't use extent_record anymore, introduce new error bit
9339 static int check_extent_item(struct btrfs_fs_info *fs_info,
9340 struct extent_buffer *eb, int slot)
9342 struct btrfs_extent_item *ei;
9343 struct btrfs_extent_inline_ref *iref;
9344 struct btrfs_extent_data_ref *dref;
9348 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9349 u32 item_size = btrfs_item_size_nr(eb, slot);
9354 struct btrfs_key key;
9358 btrfs_item_key_to_cpu(eb, &key, slot);
9359 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9360 bytes_used += key.offset;
9362 bytes_used += nodesize;
9364 if (item_size < sizeof(*ei)) {
9366 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9367 * old thing when on disk format is still un-determined.
9368 * No need to care about it anymore
9370 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9374 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9375 flags = btrfs_extent_flags(eb, ei);
9377 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9379 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9380 error("bad metadata [%llu, %llu) crossing stripe boundary",
9381 key.objectid, key.objectid + nodesize);
9382 err |= CROSSING_STRIPE_BOUNDARY;
9385 ptr = (unsigned long)(ei + 1);
9387 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9388 /* Old EXTENT_ITEM metadata */
9389 struct btrfs_tree_block_info *info;
9391 info = (struct btrfs_tree_block_info *)ptr;
9392 level = btrfs_tree_block_level(eb, info);
9393 ptr += sizeof(struct btrfs_tree_block_info);
9395 /* New METADATA_ITEM */
9398 end = (unsigned long)ei + item_size;
9401 err |= ITEM_SIZE_MISMATCH;
9405 /* Now check every backref in this extent item */
9407 iref = (struct btrfs_extent_inline_ref *)ptr;
9408 type = btrfs_extent_inline_ref_type(eb, iref);
9409 offset = btrfs_extent_inline_ref_offset(eb, iref);
9411 case BTRFS_TREE_BLOCK_REF_KEY:
9412 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9416 case BTRFS_SHARED_BLOCK_REF_KEY:
9417 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9421 case BTRFS_EXTENT_DATA_REF_KEY:
9422 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9423 ret = check_extent_data_backref(fs_info,
9424 btrfs_extent_data_ref_root(eb, dref),
9425 btrfs_extent_data_ref_objectid(eb, dref),
9426 btrfs_extent_data_ref_offset(eb, dref),
9427 key.objectid, key.offset,
9428 btrfs_extent_data_ref_count(eb, dref));
9431 case BTRFS_SHARED_DATA_REF_KEY:
9432 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9436 error("extent[%llu %d %llu] has unknown ref type: %d",
9437 key.objectid, key.type, key.offset, type);
9438 err |= UNKNOWN_TYPE;
9442 ptr += btrfs_extent_inline_ref_size(type);
9451 * Check if a dev extent item is referred correctly by its chunk
9453 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9454 struct extent_buffer *eb, int slot)
9456 struct btrfs_root *chunk_root = fs_info->chunk_root;
9457 struct btrfs_dev_extent *ptr;
9458 struct btrfs_path path;
9459 struct btrfs_key chunk_key;
9460 struct btrfs_key devext_key;
9461 struct btrfs_chunk *chunk;
9462 struct extent_buffer *l;
9466 int found_chunk = 0;
9469 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9470 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9471 length = btrfs_dev_extent_length(eb, ptr);
9473 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9474 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9475 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9477 btrfs_init_path(&path);
9478 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9483 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9484 if (btrfs_chunk_length(l, chunk) != length)
9487 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9488 for (i = 0; i < num_stripes; i++) {
9489 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9490 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9492 if (devid == devext_key.objectid &&
9493 offset == devext_key.offset) {
9499 btrfs_release_path(&path);
9502 "device extent[%llu, %llu, %llu] did not find the related chunk",
9503 devext_key.objectid, devext_key.offset, length);
9504 return REFERENCER_MISSING;
9510 * Check if the used space is correct with the dev item
9512 static int check_dev_item(struct btrfs_fs_info *fs_info,
9513 struct extent_buffer *eb, int slot)
9515 struct btrfs_root *dev_root = fs_info->dev_root;
9516 struct btrfs_dev_item *dev_item;
9517 struct btrfs_path path;
9518 struct btrfs_key key;
9519 struct btrfs_dev_extent *ptr;
9525 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9526 dev_id = btrfs_device_id(eb, dev_item);
9527 used = btrfs_device_bytes_used(eb, dev_item);
9529 key.objectid = dev_id;
9530 key.type = BTRFS_DEV_EXTENT_KEY;
9533 btrfs_init_path(&path);
9534 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9536 btrfs_item_key_to_cpu(eb, &key, slot);
9537 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9538 key.objectid, key.type, key.offset);
9539 btrfs_release_path(&path);
9540 return REFERENCER_MISSING;
9543 /* Iterate dev_extents to calculate the used space of a device */
9545 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9547 if (key.objectid > dev_id)
9549 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9552 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9553 struct btrfs_dev_extent);
9554 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9556 ret = btrfs_next_item(dev_root, &path);
9560 btrfs_release_path(&path);
9562 if (used != total) {
9563 btrfs_item_key_to_cpu(eb, &key, slot);
9565 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9566 total, used, BTRFS_ROOT_TREE_OBJECTID,
9567 BTRFS_DEV_EXTENT_KEY, dev_id);
9568 return ACCOUNTING_MISMATCH;
9574 * Check a block group item with its referener (chunk) and its used space
9575 * with extent/metadata item
9577 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9578 struct extent_buffer *eb, int slot)
9580 struct btrfs_root *extent_root = fs_info->extent_root;
9581 struct btrfs_root *chunk_root = fs_info->chunk_root;
9582 struct btrfs_block_group_item *bi;
9583 struct btrfs_block_group_item bg_item;
9584 struct btrfs_path path;
9585 struct btrfs_key bg_key;
9586 struct btrfs_key chunk_key;
9587 struct btrfs_key extent_key;
9588 struct btrfs_chunk *chunk;
9589 struct extent_buffer *leaf;
9590 struct btrfs_extent_item *ei;
9591 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9599 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9600 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9601 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9602 used = btrfs_block_group_used(&bg_item);
9603 bg_flags = btrfs_block_group_flags(&bg_item);
9605 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9606 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9607 chunk_key.offset = bg_key.objectid;
9609 btrfs_init_path(&path);
9610 /* Search for the referencer chunk */
9611 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9614 "block group[%llu %llu] did not find the related chunk item",
9615 bg_key.objectid, bg_key.offset);
9616 err |= REFERENCER_MISSING;
9618 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9619 struct btrfs_chunk);
9620 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9623 "block group[%llu %llu] related chunk item length does not match",
9624 bg_key.objectid, bg_key.offset);
9625 err |= REFERENCER_MISMATCH;
9628 btrfs_release_path(&path);
9630 /* Search from the block group bytenr */
9631 extent_key.objectid = bg_key.objectid;
9632 extent_key.type = 0;
9633 extent_key.offset = 0;
9635 btrfs_init_path(&path);
9636 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9640 /* Iterate extent tree to account used space */
9642 leaf = path.nodes[0];
9643 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9644 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9647 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9648 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9650 if (extent_key.objectid < bg_key.objectid)
9653 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9656 total += extent_key.offset;
9658 ei = btrfs_item_ptr(leaf, path.slots[0],
9659 struct btrfs_extent_item);
9660 flags = btrfs_extent_flags(leaf, ei);
9661 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9662 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9664 "bad extent[%llu, %llu) type mismatch with chunk",
9665 extent_key.objectid,
9666 extent_key.objectid + extent_key.offset);
9667 err |= CHUNK_TYPE_MISMATCH;
9669 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9670 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9671 BTRFS_BLOCK_GROUP_METADATA))) {
9673 "bad extent[%llu, %llu) type mismatch with chunk",
9674 extent_key.objectid,
9675 extent_key.objectid + nodesize);
9676 err |= CHUNK_TYPE_MISMATCH;
9680 ret = btrfs_next_item(extent_root, &path);
9686 btrfs_release_path(&path);
9688 if (total != used) {
9690 "block group[%llu %llu] used %llu but extent items used %llu",
9691 bg_key.objectid, bg_key.offset, used, total);
9692 err |= ACCOUNTING_MISMATCH;
9698 * Check a chunk item.
9699 * Including checking all referred dev_extents and block group
9701 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9702 struct extent_buffer *eb, int slot)
9704 struct btrfs_root *extent_root = fs_info->extent_root;
9705 struct btrfs_root *dev_root = fs_info->dev_root;
9706 struct btrfs_path path;
9707 struct btrfs_key chunk_key;
9708 struct btrfs_key bg_key;
9709 struct btrfs_key devext_key;
9710 struct btrfs_chunk *chunk;
9711 struct extent_buffer *leaf;
9712 struct btrfs_block_group_item *bi;
9713 struct btrfs_block_group_item bg_item;
9714 struct btrfs_dev_extent *ptr;
9715 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9727 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9728 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9729 length = btrfs_chunk_length(eb, chunk);
9730 chunk_end = chunk_key.offset + length;
9731 if (!IS_ALIGNED(length, sectorsize)) {
9732 error("chunk[%llu %llu) not aligned to %u",
9733 chunk_key.offset, chunk_end, sectorsize);
9734 err |= BYTES_UNALIGNED;
9738 type = btrfs_chunk_type(eb, chunk);
9739 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9740 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9741 error("chunk[%llu %llu) has no chunk type",
9742 chunk_key.offset, chunk_end);
9743 err |= UNKNOWN_TYPE;
9745 if (profile && (profile & (profile - 1))) {
9746 error("chunk[%llu %llu) multiple profiles detected: %llx",
9747 chunk_key.offset, chunk_end, profile);
9748 err |= UNKNOWN_TYPE;
9751 bg_key.objectid = chunk_key.offset;
9752 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9753 bg_key.offset = length;
9755 btrfs_init_path(&path);
9756 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9759 "chunk[%llu %llu) did not find the related block group item",
9760 chunk_key.offset, chunk_end);
9761 err |= REFERENCER_MISSING;
9763 leaf = path.nodes[0];
9764 bi = btrfs_item_ptr(leaf, path.slots[0],
9765 struct btrfs_block_group_item);
9766 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9768 if (btrfs_block_group_flags(&bg_item) != type) {
9770 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9771 chunk_key.offset, chunk_end, type,
9772 btrfs_block_group_flags(&bg_item));
9773 err |= REFERENCER_MISSING;
9777 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9778 for (i = 0; i < num_stripes; i++) {
9779 btrfs_release_path(&path);
9780 btrfs_init_path(&path);
9781 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9782 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9783 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9785 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9790 leaf = path.nodes[0];
9791 ptr = btrfs_item_ptr(leaf, path.slots[0],
9792 struct btrfs_dev_extent);
9793 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9794 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9795 if (objectid != chunk_key.objectid ||
9796 offset != chunk_key.offset ||
9797 btrfs_dev_extent_length(leaf, ptr) != length)
9801 err |= BACKREF_MISSING;
9803 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9804 chunk_key.objectid, chunk_end, i);
9807 btrfs_release_path(&path);
9813 * Main entry function to check known items and update related accounting info
9815 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9817 struct btrfs_fs_info *fs_info = root->fs_info;
9818 struct btrfs_key key;
9821 struct btrfs_extent_data_ref *dref;
9826 btrfs_item_key_to_cpu(eb, &key, slot);
9827 type = btrfs_key_type(&key);
9830 case BTRFS_EXTENT_DATA_KEY:
9831 ret = check_extent_data_item(root, eb, slot);
9834 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9835 ret = check_block_group_item(fs_info, eb, slot);
9838 case BTRFS_DEV_ITEM_KEY:
9839 ret = check_dev_item(fs_info, eb, slot);
9842 case BTRFS_CHUNK_ITEM_KEY:
9843 ret = check_chunk_item(fs_info, eb, slot);
9846 case BTRFS_DEV_EXTENT_KEY:
9847 ret = check_dev_extent_item(fs_info, eb, slot);
9850 case BTRFS_EXTENT_ITEM_KEY:
9851 case BTRFS_METADATA_ITEM_KEY:
9852 ret = check_extent_item(fs_info, eb, slot);
9855 case BTRFS_EXTENT_CSUM_KEY:
9856 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9858 case BTRFS_TREE_BLOCK_REF_KEY:
9859 ret = check_tree_block_backref(fs_info, key.offset,
9863 case BTRFS_EXTENT_DATA_REF_KEY:
9864 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9865 ret = check_extent_data_backref(fs_info,
9866 btrfs_extent_data_ref_root(eb, dref),
9867 btrfs_extent_data_ref_objectid(eb, dref),
9868 btrfs_extent_data_ref_offset(eb, dref),
9870 btrfs_extent_data_ref_count(eb, dref));
9873 case BTRFS_SHARED_BLOCK_REF_KEY:
9874 ret = check_shared_block_backref(fs_info, key.offset,
9878 case BTRFS_SHARED_DATA_REF_KEY:
9879 ret = check_shared_data_backref(fs_info, key.offset,
9887 if (++slot < btrfs_header_nritems(eb))
9894 * Helper function for later fs/subvol tree check. To determine if a tree
9895 * block should be checked.
9896 * This function will ensure only the direct referencer with lowest rootid to
9897 * check a fs/subvolume tree block.
9899 * Backref check at extent tree would detect errors like missing subvolume
9900 * tree, so we can do aggressive check to reduce duplicated checks.
9902 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9904 struct btrfs_root *extent_root = root->fs_info->extent_root;
9905 struct btrfs_key key;
9906 struct btrfs_path path;
9907 struct extent_buffer *leaf;
9909 struct btrfs_extent_item *ei;
9915 struct btrfs_extent_inline_ref *iref;
9918 btrfs_init_path(&path);
9919 key.objectid = btrfs_header_bytenr(eb);
9920 key.type = BTRFS_METADATA_ITEM_KEY;
9921 key.offset = (u64)-1;
9924 * Any failure in backref resolving means we can't determine
9925 * whom the tree block belongs to.
9926 * So in that case, we need to check that tree block
9928 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9932 ret = btrfs_previous_extent_item(extent_root, &path,
9933 btrfs_header_bytenr(eb));
9937 leaf = path.nodes[0];
9938 slot = path.slots[0];
9939 btrfs_item_key_to_cpu(leaf, &key, slot);
9940 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9942 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9943 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9945 struct btrfs_tree_block_info *info;
9947 info = (struct btrfs_tree_block_info *)(ei + 1);
9948 iref = (struct btrfs_extent_inline_ref *)(info + 1);
9951 item_size = btrfs_item_size_nr(leaf, slot);
9952 ptr = (unsigned long)iref;
9953 end = (unsigned long)ei + item_size;
9955 iref = (struct btrfs_extent_inline_ref *)ptr;
9956 type = btrfs_extent_inline_ref_type(leaf, iref);
9957 offset = btrfs_extent_inline_ref_offset(leaf, iref);
9960 * We only check the tree block if current root is
9961 * the lowest referencer of it.
9963 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
9964 offset < root->objectid) {
9965 btrfs_release_path(&path);
9969 ptr += btrfs_extent_inline_ref_size(type);
9972 * Normally we should also check keyed tree block ref, but that may be
9973 * very time consuming. Inlined ref should already make us skip a lot
9974 * of refs now. So skip search keyed tree block ref.
9978 btrfs_release_path(&path);
9983 * Traversal function for tree block. We will do:
9984 * 1) Skip shared fs/subvolume tree blocks
9985 * 2) Update related bytes accounting
9986 * 3) Pre-order traversal
9988 static int traverse_tree_block(struct btrfs_root *root,
9989 struct extent_buffer *node)
9991 struct extent_buffer *eb;
9999 * Skip shared fs/subvolume tree block, in that case they will
10000 * be checked by referencer with lowest rootid
10002 if (is_fstree(root->objectid) && !should_check(root, node))
10005 /* Update bytes accounting */
10006 total_btree_bytes += node->len;
10007 if (fs_root_objectid(btrfs_header_owner(node)))
10008 total_fs_tree_bytes += node->len;
10009 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10010 total_extent_tree_bytes += node->len;
10011 if (!found_old_backref &&
10012 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10013 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10014 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10015 found_old_backref = 1;
10017 /* pre-order tranversal, check itself first */
10018 level = btrfs_header_level(node);
10019 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10020 btrfs_header_level(node),
10021 btrfs_header_owner(node));
10025 "check %s failed root %llu bytenr %llu level %d, force continue check",
10026 level ? "node":"leaf", root->objectid,
10027 btrfs_header_bytenr(node), btrfs_header_level(node));
10030 btree_space_waste += btrfs_leaf_free_space(root, node);
10031 ret = check_leaf_items(root, node);
10036 nr = btrfs_header_nritems(node);
10037 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10038 sizeof(struct btrfs_key_ptr);
10040 /* Then check all its children */
10041 for (i = 0; i < nr; i++) {
10042 u64 blocknr = btrfs_node_blockptr(node, i);
10045 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10046 * to call the function itself.
10048 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10049 if (extent_buffer_uptodate(eb)) {
10050 ret = traverse_tree_block(root, eb);
10053 free_extent_buffer(eb);
10060 * Low memory usage version check_chunks_and_extents.
10062 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10064 struct btrfs_path path;
10065 struct btrfs_key key;
10066 struct btrfs_root *root1;
10067 struct btrfs_root *cur_root;
10071 root1 = root->fs_info->chunk_root;
10072 ret = traverse_tree_block(root1, root1->node);
10075 root1 = root->fs_info->tree_root;
10076 ret = traverse_tree_block(root1, root1->node);
10079 btrfs_init_path(&path);
10080 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10082 key.type = BTRFS_ROOT_ITEM_KEY;
10084 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10086 error("cannot find extent treet in tree_root");
10091 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10092 if (key.type != BTRFS_ROOT_ITEM_KEY)
10094 key.offset = (u64)-1;
10096 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10097 if (IS_ERR(cur_root) || !cur_root) {
10098 error("failed to read tree: %lld", key.objectid);
10102 ret = traverse_tree_block(cur_root, cur_root->node);
10106 ret = btrfs_next_item(root1, &path);
10112 btrfs_release_path(&path);
10116 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10117 struct btrfs_root *root, int overwrite)
10119 struct extent_buffer *c;
10120 struct extent_buffer *old = root->node;
10123 struct btrfs_disk_key disk_key = {0,0,0};
10129 extent_buffer_get(c);
10132 c = btrfs_alloc_free_block(trans, root,
10134 root->root_key.objectid,
10135 &disk_key, level, 0, 0);
10138 extent_buffer_get(c);
10142 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10143 btrfs_set_header_level(c, level);
10144 btrfs_set_header_bytenr(c, c->start);
10145 btrfs_set_header_generation(c, trans->transid);
10146 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10147 btrfs_set_header_owner(c, root->root_key.objectid);
10149 write_extent_buffer(c, root->fs_info->fsid,
10150 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10152 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10153 btrfs_header_chunk_tree_uuid(c),
10156 btrfs_mark_buffer_dirty(c);
10158 * this case can happen in the following case:
10160 * 1.overwrite previous root.
10162 * 2.reinit reloc data root, this is because we skip pin
10163 * down reloc data tree before which means we can allocate
10164 * same block bytenr here.
10166 if (old->start == c->start) {
10167 btrfs_set_root_generation(&root->root_item,
10169 root->root_item.level = btrfs_header_level(root->node);
10170 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10171 &root->root_key, &root->root_item);
10173 free_extent_buffer(c);
10177 free_extent_buffer(old);
10179 add_root_to_dirty_list(root);
10183 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10184 struct extent_buffer *eb, int tree_root)
10186 struct extent_buffer *tmp;
10187 struct btrfs_root_item *ri;
10188 struct btrfs_key key;
10191 int level = btrfs_header_level(eb);
10197 * If we have pinned this block before, don't pin it again.
10198 * This can not only avoid forever loop with broken filesystem
10199 * but also give us some speedups.
10201 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10202 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10205 btrfs_pin_extent(fs_info, eb->start, eb->len);
10207 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10208 nritems = btrfs_header_nritems(eb);
10209 for (i = 0; i < nritems; i++) {
10211 btrfs_item_key_to_cpu(eb, &key, i);
10212 if (key.type != BTRFS_ROOT_ITEM_KEY)
10214 /* Skip the extent root and reloc roots */
10215 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10216 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10217 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10219 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10220 bytenr = btrfs_disk_root_bytenr(eb, ri);
10223 * If at any point we start needing the real root we
10224 * will have to build a stump root for the root we are
10225 * in, but for now this doesn't actually use the root so
10226 * just pass in extent_root.
10228 tmp = read_tree_block(fs_info->extent_root, bytenr,
10230 if (!extent_buffer_uptodate(tmp)) {
10231 fprintf(stderr, "Error reading root block\n");
10234 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10235 free_extent_buffer(tmp);
10239 bytenr = btrfs_node_blockptr(eb, i);
10241 /* If we aren't the tree root don't read the block */
10242 if (level == 1 && !tree_root) {
10243 btrfs_pin_extent(fs_info, bytenr, nodesize);
10247 tmp = read_tree_block(fs_info->extent_root, bytenr,
10249 if (!extent_buffer_uptodate(tmp)) {
10250 fprintf(stderr, "Error reading tree block\n");
10253 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10254 free_extent_buffer(tmp);
10263 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10267 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10271 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10274 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10276 struct btrfs_block_group_cache *cache;
10277 struct btrfs_path *path;
10278 struct extent_buffer *leaf;
10279 struct btrfs_chunk *chunk;
10280 struct btrfs_key key;
10284 path = btrfs_alloc_path();
10289 key.type = BTRFS_CHUNK_ITEM_KEY;
10292 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10294 btrfs_free_path(path);
10299 * We do this in case the block groups were screwed up and had alloc
10300 * bits that aren't actually set on the chunks. This happens with
10301 * restored images every time and could happen in real life I guess.
10303 fs_info->avail_data_alloc_bits = 0;
10304 fs_info->avail_metadata_alloc_bits = 0;
10305 fs_info->avail_system_alloc_bits = 0;
10307 /* First we need to create the in-memory block groups */
10309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10310 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10312 btrfs_free_path(path);
10320 leaf = path->nodes[0];
10321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10322 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10327 chunk = btrfs_item_ptr(leaf, path->slots[0],
10328 struct btrfs_chunk);
10329 btrfs_add_block_group(fs_info, 0,
10330 btrfs_chunk_type(leaf, chunk),
10331 key.objectid, key.offset,
10332 btrfs_chunk_length(leaf, chunk));
10333 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10334 key.offset + btrfs_chunk_length(leaf, chunk),
10340 cache = btrfs_lookup_first_block_group(fs_info, start);
10344 start = cache->key.objectid + cache->key.offset;
10347 btrfs_free_path(path);
10351 static int reset_balance(struct btrfs_trans_handle *trans,
10352 struct btrfs_fs_info *fs_info)
10354 struct btrfs_root *root = fs_info->tree_root;
10355 struct btrfs_path *path;
10356 struct extent_buffer *leaf;
10357 struct btrfs_key key;
10358 int del_slot, del_nr = 0;
10362 path = btrfs_alloc_path();
10366 key.objectid = BTRFS_BALANCE_OBJECTID;
10367 key.type = BTRFS_BALANCE_ITEM_KEY;
10370 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10375 goto reinit_data_reloc;
10380 ret = btrfs_del_item(trans, root, path);
10383 btrfs_release_path(path);
10385 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10386 key.type = BTRFS_ROOT_ITEM_KEY;
10389 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10393 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10398 ret = btrfs_del_items(trans, root, path,
10405 btrfs_release_path(path);
10408 ret = btrfs_search_slot(trans, root, &key, path,
10415 leaf = path->nodes[0];
10416 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10417 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10419 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10424 del_slot = path->slots[0];
10433 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10437 btrfs_release_path(path);
10440 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10441 key.type = BTRFS_ROOT_ITEM_KEY;
10442 key.offset = (u64)-1;
10443 root = btrfs_read_fs_root(fs_info, &key);
10444 if (IS_ERR(root)) {
10445 fprintf(stderr, "Error reading data reloc tree\n");
10446 ret = PTR_ERR(root);
10449 record_root_in_trans(trans, root);
10450 ret = btrfs_fsck_reinit_root(trans, root, 0);
10453 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10455 btrfs_free_path(path);
10459 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10460 struct btrfs_fs_info *fs_info)
10466 * The only reason we don't do this is because right now we're just
10467 * walking the trees we find and pinning down their bytes, we don't look
10468 * at any of the leaves. In order to do mixed groups we'd have to check
10469 * the leaves of any fs roots and pin down the bytes for any file
10470 * extents we find. Not hard but why do it if we don't have to?
10472 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10473 fprintf(stderr, "We don't support re-initing the extent tree "
10474 "for mixed block groups yet, please notify a btrfs "
10475 "developer you want to do this so they can add this "
10476 "functionality.\n");
10481 * first we need to walk all of the trees except the extent tree and pin
10482 * down the bytes that are in use so we don't overwrite any existing
10485 ret = pin_metadata_blocks(fs_info);
10487 fprintf(stderr, "error pinning down used bytes\n");
10492 * Need to drop all the block groups since we're going to recreate all
10495 btrfs_free_block_groups(fs_info);
10496 ret = reset_block_groups(fs_info);
10498 fprintf(stderr, "error resetting the block groups\n");
10502 /* Ok we can allocate now, reinit the extent root */
10503 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10505 fprintf(stderr, "extent root initialization failed\n");
10507 * When the transaction code is updated we should end the
10508 * transaction, but for now progs only knows about commit so
10509 * just return an error.
10515 * Now we have all the in-memory block groups setup so we can make
10516 * allocations properly, and the metadata we care about is safe since we
10517 * pinned all of it above.
10520 struct btrfs_block_group_cache *cache;
10522 cache = btrfs_lookup_first_block_group(fs_info, start);
10525 start = cache->key.objectid + cache->key.offset;
10526 ret = btrfs_insert_item(trans, fs_info->extent_root,
10527 &cache->key, &cache->item,
10528 sizeof(cache->item));
10530 fprintf(stderr, "Error adding block group\n");
10533 btrfs_extent_post_op(trans, fs_info->extent_root);
10536 ret = reset_balance(trans, fs_info);
10538 fprintf(stderr, "error resetting the pending balance\n");
10543 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10545 struct btrfs_path *path;
10546 struct btrfs_trans_handle *trans;
10547 struct btrfs_key key;
10550 printf("Recowing metadata block %llu\n", eb->start);
10551 key.objectid = btrfs_header_owner(eb);
10552 key.type = BTRFS_ROOT_ITEM_KEY;
10553 key.offset = (u64)-1;
10555 root = btrfs_read_fs_root(root->fs_info, &key);
10556 if (IS_ERR(root)) {
10557 fprintf(stderr, "Couldn't find owner root %llu\n",
10559 return PTR_ERR(root);
10562 path = btrfs_alloc_path();
10566 trans = btrfs_start_transaction(root, 1);
10567 if (IS_ERR(trans)) {
10568 btrfs_free_path(path);
10569 return PTR_ERR(trans);
10572 path->lowest_level = btrfs_header_level(eb);
10573 if (path->lowest_level)
10574 btrfs_node_key_to_cpu(eb, &key, 0);
10576 btrfs_item_key_to_cpu(eb, &key, 0);
10578 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10579 btrfs_commit_transaction(trans, root);
10580 btrfs_free_path(path);
10584 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10586 struct btrfs_path *path;
10587 struct btrfs_trans_handle *trans;
10588 struct btrfs_key key;
10591 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10592 bad->key.type, bad->key.offset);
10593 key.objectid = bad->root_id;
10594 key.type = BTRFS_ROOT_ITEM_KEY;
10595 key.offset = (u64)-1;
10597 root = btrfs_read_fs_root(root->fs_info, &key);
10598 if (IS_ERR(root)) {
10599 fprintf(stderr, "Couldn't find owner root %llu\n",
10601 return PTR_ERR(root);
10604 path = btrfs_alloc_path();
10608 trans = btrfs_start_transaction(root, 1);
10609 if (IS_ERR(trans)) {
10610 btrfs_free_path(path);
10611 return PTR_ERR(trans);
10614 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10620 ret = btrfs_del_item(trans, root, path);
10622 btrfs_commit_transaction(trans, root);
10623 btrfs_free_path(path);
10627 static int zero_log_tree(struct btrfs_root *root)
10629 struct btrfs_trans_handle *trans;
10632 trans = btrfs_start_transaction(root, 1);
10633 if (IS_ERR(trans)) {
10634 ret = PTR_ERR(trans);
10637 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10638 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10639 ret = btrfs_commit_transaction(trans, root);
10643 static int populate_csum(struct btrfs_trans_handle *trans,
10644 struct btrfs_root *csum_root, char *buf, u64 start,
10651 while (offset < len) {
10652 sectorsize = csum_root->sectorsize;
10653 ret = read_extent_data(csum_root, buf, start + offset,
10657 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10658 start + offset, buf, sectorsize);
10661 offset += sectorsize;
10666 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10667 struct btrfs_root *csum_root,
10668 struct btrfs_root *cur_root)
10670 struct btrfs_path *path;
10671 struct btrfs_key key;
10672 struct extent_buffer *node;
10673 struct btrfs_file_extent_item *fi;
10680 path = btrfs_alloc_path();
10683 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10693 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10696 /* Iterate all regular file extents and fill its csum */
10698 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10700 if (key.type != BTRFS_EXTENT_DATA_KEY)
10702 node = path->nodes[0];
10703 slot = path->slots[0];
10704 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10705 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10707 start = btrfs_file_extent_disk_bytenr(node, fi);
10708 len = btrfs_file_extent_disk_num_bytes(node, fi);
10710 ret = populate_csum(trans, csum_root, buf, start, len);
10711 if (ret == -EEXIST)
10717 * TODO: if next leaf is corrupted, jump to nearest next valid
10720 ret = btrfs_next_item(cur_root, path);
10730 btrfs_free_path(path);
10735 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10736 struct btrfs_root *csum_root)
10738 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10739 struct btrfs_path *path;
10740 struct btrfs_root *tree_root = fs_info->tree_root;
10741 struct btrfs_root *cur_root;
10742 struct extent_buffer *node;
10743 struct btrfs_key key;
10747 path = btrfs_alloc_path();
10751 key.objectid = BTRFS_FS_TREE_OBJECTID;
10753 key.type = BTRFS_ROOT_ITEM_KEY;
10755 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10764 node = path->nodes[0];
10765 slot = path->slots[0];
10766 btrfs_item_key_to_cpu(node, &key, slot);
10767 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10769 if (key.type != BTRFS_ROOT_ITEM_KEY)
10771 if (!is_fstree(key.objectid))
10773 key.offset = (u64)-1;
10775 cur_root = btrfs_read_fs_root(fs_info, &key);
10776 if (IS_ERR(cur_root) || !cur_root) {
10777 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10781 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10786 ret = btrfs_next_item(tree_root, path);
10796 btrfs_free_path(path);
10800 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10801 struct btrfs_root *csum_root)
10803 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10804 struct btrfs_path *path;
10805 struct btrfs_extent_item *ei;
10806 struct extent_buffer *leaf;
10808 struct btrfs_key key;
10811 path = btrfs_alloc_path();
10816 key.type = BTRFS_EXTENT_ITEM_KEY;
10819 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10821 btrfs_free_path(path);
10825 buf = malloc(csum_root->sectorsize);
10827 btrfs_free_path(path);
10832 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10833 ret = btrfs_next_leaf(extent_root, path);
10841 leaf = path->nodes[0];
10843 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10844 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10849 ei = btrfs_item_ptr(leaf, path->slots[0],
10850 struct btrfs_extent_item);
10851 if (!(btrfs_extent_flags(leaf, ei) &
10852 BTRFS_EXTENT_FLAG_DATA)) {
10857 ret = populate_csum(trans, csum_root, buf, key.objectid,
10864 btrfs_free_path(path);
10870 * Recalculate the csum and put it into the csum tree.
10872 * Extent tree init will wipe out all the extent info, so in that case, we
10873 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10874 * will use fs/subvol trees to init the csum tree.
10876 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10877 struct btrfs_root *csum_root,
10878 int search_fs_tree)
10880 if (search_fs_tree)
10881 return fill_csum_tree_from_fs(trans, csum_root);
10883 return fill_csum_tree_from_extent(trans, csum_root);
10886 static void free_roots_info_cache(void)
10888 if (!roots_info_cache)
10891 while (!cache_tree_empty(roots_info_cache)) {
10892 struct cache_extent *entry;
10893 struct root_item_info *rii;
10895 entry = first_cache_extent(roots_info_cache);
10898 remove_cache_extent(roots_info_cache, entry);
10899 rii = container_of(entry, struct root_item_info, cache_extent);
10903 free(roots_info_cache);
10904 roots_info_cache = NULL;
10907 static int build_roots_info_cache(struct btrfs_fs_info *info)
10910 struct btrfs_key key;
10911 struct extent_buffer *leaf;
10912 struct btrfs_path *path;
10914 if (!roots_info_cache) {
10915 roots_info_cache = malloc(sizeof(*roots_info_cache));
10916 if (!roots_info_cache)
10918 cache_tree_init(roots_info_cache);
10921 path = btrfs_alloc_path();
10926 key.type = BTRFS_EXTENT_ITEM_KEY;
10929 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10932 leaf = path->nodes[0];
10935 struct btrfs_key found_key;
10936 struct btrfs_extent_item *ei;
10937 struct btrfs_extent_inline_ref *iref;
10938 int slot = path->slots[0];
10943 struct cache_extent *entry;
10944 struct root_item_info *rii;
10946 if (slot >= btrfs_header_nritems(leaf)) {
10947 ret = btrfs_next_leaf(info->extent_root, path);
10954 leaf = path->nodes[0];
10955 slot = path->slots[0];
10958 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10960 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10961 found_key.type != BTRFS_METADATA_ITEM_KEY)
10964 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10965 flags = btrfs_extent_flags(leaf, ei);
10967 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10968 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10971 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10972 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10973 level = found_key.offset;
10975 struct btrfs_tree_block_info *binfo;
10977 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10978 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10979 level = btrfs_tree_block_level(leaf, binfo);
10983 * For a root extent, it must be of the following type and the
10984 * first (and only one) iref in the item.
10986 type = btrfs_extent_inline_ref_type(leaf, iref);
10987 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10990 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10991 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10993 rii = malloc(sizeof(struct root_item_info));
10998 rii->cache_extent.start = root_id;
10999 rii->cache_extent.size = 1;
11000 rii->level = (u8)-1;
11001 entry = &rii->cache_extent;
11002 ret = insert_cache_extent(roots_info_cache, entry);
11005 rii = container_of(entry, struct root_item_info,
11009 ASSERT(rii->cache_extent.start == root_id);
11010 ASSERT(rii->cache_extent.size == 1);
11012 if (level > rii->level || rii->level == (u8)-1) {
11013 rii->level = level;
11014 rii->bytenr = found_key.objectid;
11015 rii->gen = btrfs_extent_generation(leaf, ei);
11016 rii->node_count = 1;
11017 } else if (level == rii->level) {
11025 btrfs_free_path(path);
11030 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11031 struct btrfs_path *path,
11032 const struct btrfs_key *root_key,
11033 const int read_only_mode)
11035 const u64 root_id = root_key->objectid;
11036 struct cache_extent *entry;
11037 struct root_item_info *rii;
11038 struct btrfs_root_item ri;
11039 unsigned long offset;
11041 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11044 "Error: could not find extent items for root %llu\n",
11045 root_key->objectid);
11049 rii = container_of(entry, struct root_item_info, cache_extent);
11050 ASSERT(rii->cache_extent.start == root_id);
11051 ASSERT(rii->cache_extent.size == 1);
11053 if (rii->node_count != 1) {
11055 "Error: could not find btree root extent for root %llu\n",
11060 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11061 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11063 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11064 btrfs_root_level(&ri) != rii->level ||
11065 btrfs_root_generation(&ri) != rii->gen) {
11068 * If we're in repair mode but our caller told us to not update
11069 * the root item, i.e. just check if it needs to be updated, don't
11070 * print this message, since the caller will call us again shortly
11071 * for the same root item without read only mode (the caller will
11072 * open a transaction first).
11074 if (!(read_only_mode && repair))
11076 "%sroot item for root %llu,"
11077 " current bytenr %llu, current gen %llu, current level %u,"
11078 " new bytenr %llu, new gen %llu, new level %u\n",
11079 (read_only_mode ? "" : "fixing "),
11081 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11082 btrfs_root_level(&ri),
11083 rii->bytenr, rii->gen, rii->level);
11085 if (btrfs_root_generation(&ri) > rii->gen) {
11087 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11088 root_id, btrfs_root_generation(&ri), rii->gen);
11092 if (!read_only_mode) {
11093 btrfs_set_root_bytenr(&ri, rii->bytenr);
11094 btrfs_set_root_level(&ri, rii->level);
11095 btrfs_set_root_generation(&ri, rii->gen);
11096 write_extent_buffer(path->nodes[0], &ri,
11097 offset, sizeof(ri));
11107 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11108 * caused read-only snapshots to be corrupted if they were created at a moment
11109 * when the source subvolume/snapshot had orphan items. The issue was that the
11110 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11111 * node instead of the post orphan cleanup root node.
11112 * So this function, and its callees, just detects and fixes those cases. Even
11113 * though the regression was for read-only snapshots, this function applies to
11114 * any snapshot/subvolume root.
11115 * This must be run before any other repair code - not doing it so, makes other
11116 * repair code delete or modify backrefs in the extent tree for example, which
11117 * will result in an inconsistent fs after repairing the root items.
11119 static int repair_root_items(struct btrfs_fs_info *info)
11121 struct btrfs_path *path = NULL;
11122 struct btrfs_key key;
11123 struct extent_buffer *leaf;
11124 struct btrfs_trans_handle *trans = NULL;
11127 int need_trans = 0;
11129 ret = build_roots_info_cache(info);
11133 path = btrfs_alloc_path();
11139 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11140 key.type = BTRFS_ROOT_ITEM_KEY;
11145 * Avoid opening and committing transactions if a leaf doesn't have
11146 * any root items that need to be fixed, so that we avoid rotating
11147 * backup roots unnecessarily.
11150 trans = btrfs_start_transaction(info->tree_root, 1);
11151 if (IS_ERR(trans)) {
11152 ret = PTR_ERR(trans);
11157 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11161 leaf = path->nodes[0];
11164 struct btrfs_key found_key;
11166 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11167 int no_more_keys = find_next_key(path, &key);
11169 btrfs_release_path(path);
11171 ret = btrfs_commit_transaction(trans,
11183 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11185 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11187 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11190 ret = maybe_repair_root_item(info, path, &found_key,
11195 if (!trans && repair) {
11198 btrfs_release_path(path);
11208 free_roots_info_cache();
11209 btrfs_free_path(path);
11211 btrfs_commit_transaction(trans, info->tree_root);
11218 const char * const cmd_check_usage[] = {
11219 "btrfs check [options] <device>",
11220 "Check structural integrity of a filesystem (unmounted).",
11221 "Check structural integrity of an unmounted filesystem. Verify internal",
11222 "trees' consistency and item connectivity. In the repair mode try to",
11223 "fix the problems found. ",
11224 "WARNING: the repair mode is considered dangerous",
11226 "-s|--super <superblock> use this superblock copy",
11227 "-b|--backup use the first valid backup root copy",
11228 "--repair try to repair the filesystem",
11229 "--readonly run in read-only mode (default)",
11230 "--init-csum-tree create a new CRC tree",
11231 "--init-extent-tree create a new extent tree",
11232 "--mode <MODE> select mode, allows to make some memory/IO",
11233 " trade-offs, where MODE is one of:",
11234 " original - read inodes and extents to memory (requires",
11235 " more memory, does less IO)",
11236 " lowmem - try to use less memory but read blocks again",
11238 "--check-data-csum verify checksums of data blocks",
11239 "-Q|--qgroup-report print a report on qgroup consistency",
11240 "-E|--subvol-extents <subvolid>",
11241 " print subvolume extents and sharing state",
11242 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11243 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11244 "-p|--progress indicate progress",
11248 int cmd_check(int argc, char **argv)
11250 struct cache_tree root_cache;
11251 struct btrfs_root *root;
11252 struct btrfs_fs_info *info;
11255 u64 tree_root_bytenr = 0;
11256 u64 chunk_root_bytenr = 0;
11257 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11260 int init_csum_tree = 0;
11262 int qgroup_report = 0;
11263 int qgroups_repaired = 0;
11264 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11268 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11269 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11270 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11272 static const struct option long_options[] = {
11273 { "super", required_argument, NULL, 's' },
11274 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11275 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11276 { "init-csum-tree", no_argument, NULL,
11277 GETOPT_VAL_INIT_CSUM },
11278 { "init-extent-tree", no_argument, NULL,
11279 GETOPT_VAL_INIT_EXTENT },
11280 { "check-data-csum", no_argument, NULL,
11281 GETOPT_VAL_CHECK_CSUM },
11282 { "backup", no_argument, NULL, 'b' },
11283 { "subvol-extents", required_argument, NULL, 'E' },
11284 { "qgroup-report", no_argument, NULL, 'Q' },
11285 { "tree-root", required_argument, NULL, 'r' },
11286 { "chunk-root", required_argument, NULL,
11287 GETOPT_VAL_CHUNK_TREE },
11288 { "progress", no_argument, NULL, 'p' },
11289 { "mode", required_argument, NULL,
11291 { NULL, 0, NULL, 0}
11294 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11298 case 'a': /* ignored */ break;
11300 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11303 num = arg_strtou64(optarg);
11304 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11306 "ERROR: super mirror should be less than: %d\n",
11307 BTRFS_SUPER_MIRROR_MAX);
11310 bytenr = btrfs_sb_offset(((int)num));
11311 printf("using SB copy %llu, bytenr %llu\n", num,
11312 (unsigned long long)bytenr);
11318 subvolid = arg_strtou64(optarg);
11321 tree_root_bytenr = arg_strtou64(optarg);
11323 case GETOPT_VAL_CHUNK_TREE:
11324 chunk_root_bytenr = arg_strtou64(optarg);
11327 ctx.progress_enabled = true;
11331 usage(cmd_check_usage);
11332 case GETOPT_VAL_REPAIR:
11333 printf("enabling repair mode\n");
11335 ctree_flags |= OPEN_CTREE_WRITES;
11337 case GETOPT_VAL_READONLY:
11340 case GETOPT_VAL_INIT_CSUM:
11341 printf("Creating a new CRC tree\n");
11342 init_csum_tree = 1;
11344 ctree_flags |= OPEN_CTREE_WRITES;
11346 case GETOPT_VAL_INIT_EXTENT:
11347 init_extent_tree = 1;
11348 ctree_flags |= (OPEN_CTREE_WRITES |
11349 OPEN_CTREE_NO_BLOCK_GROUPS);
11352 case GETOPT_VAL_CHECK_CSUM:
11353 check_data_csum = 1;
11355 case GETOPT_VAL_MODE:
11356 check_mode = parse_check_mode(optarg);
11357 if (check_mode == CHECK_MODE_UNKNOWN) {
11358 error("unknown mode: %s", optarg);
11365 if (check_argc_exact(argc - optind, 1))
11366 usage(cmd_check_usage);
11368 if (ctx.progress_enabled) {
11369 ctx.tp = TASK_NOTHING;
11370 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11373 /* This check is the only reason for --readonly to exist */
11374 if (readonly && repair) {
11375 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11380 * Not supported yet
11382 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11383 error("Low memory mode doesn't support repair yet");
11388 cache_tree_init(&root_cache);
11390 if((ret = check_mounted(argv[optind])) < 0) {
11391 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11394 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11399 /* only allow partial opening under repair mode */
11401 ctree_flags |= OPEN_CTREE_PARTIAL;
11403 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11404 chunk_root_bytenr, ctree_flags);
11406 fprintf(stderr, "Couldn't open file system\n");
11411 global_info = info;
11412 root = info->fs_root;
11415 * repair mode will force us to commit transaction which
11416 * will make us fail to load log tree when mounting.
11418 if (repair && btrfs_super_log_root(info->super_copy)) {
11419 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11424 ret = zero_log_tree(root);
11426 fprintf(stderr, "fail to zero log tree\n");
11431 uuid_unparse(info->super_copy->fsid, uuidbuf);
11432 if (qgroup_report) {
11433 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11435 ret = qgroup_verify_all(info);
11441 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11442 subvolid, argv[optind], uuidbuf);
11443 ret = print_extent_state(info, subvolid);
11446 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11448 if (!extent_buffer_uptodate(info->tree_root->node) ||
11449 !extent_buffer_uptodate(info->dev_root->node) ||
11450 !extent_buffer_uptodate(info->chunk_root->node)) {
11451 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11456 if (init_extent_tree || init_csum_tree) {
11457 struct btrfs_trans_handle *trans;
11459 trans = btrfs_start_transaction(info->extent_root, 0);
11460 if (IS_ERR(trans)) {
11461 fprintf(stderr, "Error starting transaction\n");
11462 ret = PTR_ERR(trans);
11466 if (init_extent_tree) {
11467 printf("Creating a new extent tree\n");
11468 ret = reinit_extent_tree(trans, info);
11473 if (init_csum_tree) {
11474 fprintf(stderr, "Reinit crc root\n");
11475 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11477 fprintf(stderr, "crc root initialization failed\n");
11482 ret = fill_csum_tree(trans, info->csum_root,
11485 fprintf(stderr, "crc refilling failed\n");
11490 * Ok now we commit and run the normal fsck, which will add
11491 * extent entries for all of the items it finds.
11493 ret = btrfs_commit_transaction(trans, info->extent_root);
11497 if (!extent_buffer_uptodate(info->extent_root->node)) {
11498 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11502 if (!extent_buffer_uptodate(info->csum_root->node)) {
11503 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11508 if (!ctx.progress_enabled)
11509 fprintf(stderr, "checking extents\n");
11510 if (check_mode == CHECK_MODE_LOWMEM)
11511 ret = check_chunks_and_extents_v2(root);
11513 ret = check_chunks_and_extents(root);
11515 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11517 ret = repair_root_items(info);
11521 fprintf(stderr, "Fixed %d roots.\n", ret);
11523 } else if (ret > 0) {
11525 "Found %d roots with an outdated root item.\n",
11528 "Please run a filesystem check with the option --repair to fix them.\n");
11533 if (!ctx.progress_enabled) {
11534 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11535 fprintf(stderr, "checking free space tree\n");
11537 fprintf(stderr, "checking free space cache\n");
11539 ret = check_space_cache(root);
11544 * We used to have to have these hole extents in between our real
11545 * extents so if we don't have this flag set we need to make sure there
11546 * are no gaps in the file extents for inodes, otherwise we can just
11547 * ignore it when this happens.
11549 no_holes = btrfs_fs_incompat(root->fs_info,
11550 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11551 if (!ctx.progress_enabled)
11552 fprintf(stderr, "checking fs roots\n");
11553 ret = check_fs_roots(root, &root_cache);
11557 fprintf(stderr, "checking csums\n");
11558 ret = check_csums(root);
11562 fprintf(stderr, "checking root refs\n");
11563 ret = check_root_refs(root, &root_cache);
11567 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11568 struct extent_buffer *eb;
11570 eb = list_first_entry(&root->fs_info->recow_ebs,
11571 struct extent_buffer, recow);
11572 list_del_init(&eb->recow);
11573 ret = recow_extent_buffer(root, eb);
11578 while (!list_empty(&delete_items)) {
11579 struct bad_item *bad;
11581 bad = list_first_entry(&delete_items, struct bad_item, list);
11582 list_del_init(&bad->list);
11584 ret = delete_bad_item(root, bad);
11588 if (info->quota_enabled) {
11590 fprintf(stderr, "checking quota groups\n");
11591 err = qgroup_verify_all(info);
11595 err = repair_qgroups(info, &qgroups_repaired);
11600 if (!list_empty(&root->fs_info->recow_ebs)) {
11601 fprintf(stderr, "Transid errors in file system\n");
11605 /* Don't override original ret */
11606 if (!ret && qgroups_repaired)
11607 ret = qgroups_repaired;
11609 if (found_old_backref) { /*
11610 * there was a disk format change when mixed
11611 * backref was in testing tree. The old format
11612 * existed about one week.
11614 printf("\n * Found old mixed backref format. "
11615 "The old format is not supported! *"
11616 "\n * Please mount the FS in readonly mode, "
11617 "backup data and re-format the FS. *\n\n");
11620 printf("found %llu bytes used err is %d\n",
11621 (unsigned long long)bytes_used, ret);
11622 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11623 printf("total tree bytes: %llu\n",
11624 (unsigned long long)total_btree_bytes);
11625 printf("total fs tree bytes: %llu\n",
11626 (unsigned long long)total_fs_tree_bytes);
11627 printf("total extent tree bytes: %llu\n",
11628 (unsigned long long)total_extent_tree_bytes);
11629 printf("btree space waste bytes: %llu\n",
11630 (unsigned long long)btree_space_waste);
11631 printf("file data blocks allocated: %llu\n referenced %llu\n",
11632 (unsigned long long)data_bytes_allocated,
11633 (unsigned long long)data_bytes_referenced);
11635 free_qgroup_counts();
11636 free_root_recs_tree(&root_cache);
11640 if (ctx.progress_enabled)
11641 task_deinit(ctx.info);