2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 enum btrfs_check_mode {
81 CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
86 struct extent_backref {
87 struct list_head list;
89 unsigned int is_data:1;
90 unsigned int found_extent_tree:1;
91 unsigned int full_backref:1;
92 unsigned int found_ref:1;
93 unsigned int broken:1;
96 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
98 return list_entry(entry, struct extent_backref, list);
101 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
103 return rb_entry(node, struct extent_backref, node);
106 struct data_backref {
107 struct extent_backref node;
121 static inline struct data_backref* to_data_backref(struct extent_backref *back)
123 return container_of(back, struct data_backref, node);
126 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
128 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
129 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
130 struct data_backref *back1 = to_data_backref(ext1);
131 struct data_backref *back2 = to_data_backref(ext2);
133 WARN_ON(!ext1->is_data);
134 WARN_ON(!ext2->is_data);
136 /* parent and root are a union, so this covers both */
137 if (back1->parent > back2->parent)
139 if (back1->parent < back2->parent)
142 /* This is a full backref and the parents match. */
143 if (back1->node.full_backref)
146 if (back1->owner > back2->owner)
148 if (back1->owner < back2->owner)
151 if (back1->offset > back2->offset)
153 if (back1->offset < back2->offset)
156 if (back1->bytes > back2->bytes)
158 if (back1->bytes < back2->bytes)
161 if (back1->found_ref && back2->found_ref) {
162 if (back1->disk_bytenr > back2->disk_bytenr)
164 if (back1->disk_bytenr < back2->disk_bytenr)
167 if (back1->found_ref > back2->found_ref)
169 if (back1->found_ref < back2->found_ref)
177 * Much like data_backref, just removed the undetermined members
178 * and change it to use list_head.
179 * During extent scan, it is stored in root->orphan_data_extent.
180 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
182 struct orphan_data_extent {
183 struct list_head list;
191 struct tree_backref {
192 struct extent_backref node;
199 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
201 return container_of(back, struct tree_backref, node);
204 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
206 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
207 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
208 struct tree_backref *back1 = to_tree_backref(ext1);
209 struct tree_backref *back2 = to_tree_backref(ext2);
211 WARN_ON(ext1->is_data);
212 WARN_ON(ext2->is_data);
214 /* parent and root are a union, so this covers both */
215 if (back1->parent > back2->parent)
217 if (back1->parent < back2->parent)
223 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
225 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
226 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
228 if (ext1->is_data > ext2->is_data)
231 if (ext1->is_data < ext2->is_data)
234 if (ext1->full_backref > ext2->full_backref)
236 if (ext1->full_backref < ext2->full_backref)
240 return compare_data_backref(node1, node2);
242 return compare_tree_backref(node1, node2);
245 /* Explicit initialization for extent_record::flag_block_full_backref */
246 enum { FLAG_UNSET = 2 };
248 struct extent_record {
249 struct list_head backrefs;
250 struct list_head dups;
251 struct rb_root backref_tree;
252 struct list_head list;
253 struct cache_extent cache;
254 struct btrfs_disk_key parent_key;
259 u64 extent_item_refs;
261 u64 parent_generation;
265 unsigned int flag_block_full_backref:2;
266 unsigned int found_rec:1;
267 unsigned int content_checked:1;
268 unsigned int owner_ref_checked:1;
269 unsigned int is_root:1;
270 unsigned int metadata:1;
271 unsigned int bad_full_backref:1;
272 unsigned int crossing_stripes:1;
273 unsigned int wrong_chunk_type:1;
276 static inline struct extent_record* to_extent_record(struct list_head *entry)
278 return container_of(entry, struct extent_record, list);
281 struct inode_backref {
282 struct list_head list;
283 unsigned int found_dir_item:1;
284 unsigned int found_dir_index:1;
285 unsigned int found_inode_ref:1;
286 unsigned int filetype:8;
288 unsigned int ref_type;
295 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
297 return list_entry(entry, struct inode_backref, list);
300 struct root_item_record {
301 struct list_head list;
308 struct btrfs_key drop_key;
311 #define REF_ERR_NO_DIR_ITEM (1 << 0)
312 #define REF_ERR_NO_DIR_INDEX (1 << 1)
313 #define REF_ERR_NO_INODE_REF (1 << 2)
314 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
315 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
316 #define REF_ERR_DUP_INODE_REF (1 << 5)
317 #define REF_ERR_INDEX_UNMATCH (1 << 6)
318 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
319 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
320 #define REF_ERR_NO_ROOT_REF (1 << 9)
321 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
322 #define REF_ERR_DUP_ROOT_REF (1 << 11)
323 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
325 struct file_extent_hole {
331 struct inode_record {
332 struct list_head backrefs;
333 unsigned int checked:1;
334 unsigned int merging:1;
335 unsigned int found_inode_item:1;
336 unsigned int found_dir_item:1;
337 unsigned int found_file_extent:1;
338 unsigned int found_csum_item:1;
339 unsigned int some_csum_missing:1;
340 unsigned int nodatasum:1;
353 struct rb_root holes;
354 struct list_head orphan_extents;
359 #define I_ERR_NO_INODE_ITEM (1 << 0)
360 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
361 #define I_ERR_DUP_INODE_ITEM (1 << 2)
362 #define I_ERR_DUP_DIR_INDEX (1 << 3)
363 #define I_ERR_ODD_DIR_ITEM (1 << 4)
364 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
365 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
366 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
367 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
368 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
369 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
370 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
371 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
372 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
373 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
375 struct root_backref {
376 struct list_head list;
377 unsigned int found_dir_item:1;
378 unsigned int found_dir_index:1;
379 unsigned int found_back_ref:1;
380 unsigned int found_forward_ref:1;
381 unsigned int reachable:1;
390 static inline struct root_backref* to_root_backref(struct list_head *entry)
392 return list_entry(entry, struct root_backref, list);
396 struct list_head backrefs;
397 struct cache_extent cache;
398 unsigned int found_root_item:1;
404 struct cache_extent cache;
409 struct cache_extent cache;
410 struct cache_tree root_cache;
411 struct cache_tree inode_cache;
412 struct inode_record *current;
421 struct walk_control {
422 struct cache_tree shared;
423 struct shared_node *nodes[BTRFS_MAX_LEVEL];
429 struct btrfs_key key;
431 struct list_head list;
434 struct extent_entry {
439 struct list_head list;
442 struct root_item_info {
443 /* level of the root */
445 /* number of nodes at this level, must be 1 for a root */
449 struct cache_extent cache_extent;
453 * Error bit for low memory mode check.
455 * Currently no caller cares about it yet. Just internal use for error
458 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
459 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
460 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
461 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
462 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
463 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
464 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
465 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
466 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
467 #define CHUNK_TYPE_MISMATCH (1 << 8)
469 static void *print_status_check(void *p)
471 struct task_ctx *priv = p;
472 const char work_indicator[] = { '.', 'o', 'O', 'o' };
474 static char *task_position_string[] = {
476 "checking free space cache",
480 task_period_start(priv->info, 1000 /* 1s */);
482 if (priv->tp == TASK_NOTHING)
486 printf("%s [%c]\r", task_position_string[priv->tp],
487 work_indicator[count % 4]);
490 task_period_wait(priv->info);
495 static int print_status_return(void *p)
503 static enum btrfs_check_mode parse_check_mode(const char *str)
505 if (strcmp(str, "lowmem") == 0)
506 return CHECK_MODE_LOWMEM;
507 if (strcmp(str, "orig") == 0)
508 return CHECK_MODE_ORIGINAL;
509 if (strcmp(str, "original") == 0)
510 return CHECK_MODE_ORIGINAL;
512 return CHECK_MODE_UNKNOWN;
515 /* Compatible function to allow reuse of old codes */
516 static u64 first_extent_gap(struct rb_root *holes)
518 struct file_extent_hole *hole;
520 if (RB_EMPTY_ROOT(holes))
523 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
527 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
529 struct file_extent_hole *hole1;
530 struct file_extent_hole *hole2;
532 hole1 = rb_entry(node1, struct file_extent_hole, node);
533 hole2 = rb_entry(node2, struct file_extent_hole, node);
535 if (hole1->start > hole2->start)
537 if (hole1->start < hole2->start)
539 /* Now hole1->start == hole2->start */
540 if (hole1->len >= hole2->len)
542 * Hole 1 will be merge center
543 * Same hole will be merged later
546 /* Hole 2 will be merge center */
551 * Add a hole to the record
553 * This will do hole merge for copy_file_extent_holes(),
554 * which will ensure there won't be continuous holes.
556 static int add_file_extent_hole(struct rb_root *holes,
559 struct file_extent_hole *hole;
560 struct file_extent_hole *prev = NULL;
561 struct file_extent_hole *next = NULL;
563 hole = malloc(sizeof(*hole));
568 /* Since compare will not return 0, no -EEXIST will happen */
569 rb_insert(holes, &hole->node, compare_hole);
571 /* simple merge with previous hole */
572 if (rb_prev(&hole->node))
573 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
575 if (prev && prev->start + prev->len >= hole->start) {
576 hole->len = hole->start + hole->len - prev->start;
577 hole->start = prev->start;
578 rb_erase(&prev->node, holes);
583 /* iterate merge with next holes */
585 if (!rb_next(&hole->node))
587 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
589 if (hole->start + hole->len >= next->start) {
590 if (hole->start + hole->len <= next->start + next->len)
591 hole->len = next->start + next->len -
593 rb_erase(&next->node, holes);
602 static int compare_hole_range(struct rb_node *node, void *data)
604 struct file_extent_hole *hole;
607 hole = (struct file_extent_hole *)data;
610 hole = rb_entry(node, struct file_extent_hole, node);
611 if (start < hole->start)
613 if (start >= hole->start && start < hole->start + hole->len)
619 * Delete a hole in the record
621 * This will do the hole split and is much restrict than add.
623 static int del_file_extent_hole(struct rb_root *holes,
626 struct file_extent_hole *hole;
627 struct file_extent_hole tmp;
632 struct rb_node *node;
639 node = rb_search(holes, &tmp, compare_hole_range, NULL);
642 hole = rb_entry(node, struct file_extent_hole, node);
643 if (start + len > hole->start + hole->len)
647 * Now there will be no overlap, delete the hole and re-add the
648 * split(s) if they exists.
650 if (start > hole->start) {
651 prev_start = hole->start;
652 prev_len = start - hole->start;
655 if (hole->start + hole->len > start + len) {
656 next_start = start + len;
657 next_len = hole->start + hole->len - start - len;
660 rb_erase(node, holes);
663 ret = add_file_extent_hole(holes, prev_start, prev_len);
668 ret = add_file_extent_hole(holes, next_start, next_len);
675 static int copy_file_extent_holes(struct rb_root *dst,
678 struct file_extent_hole *hole;
679 struct rb_node *node;
682 node = rb_first(src);
684 hole = rb_entry(node, struct file_extent_hole, node);
685 ret = add_file_extent_hole(dst, hole->start, hole->len);
688 node = rb_next(node);
693 static void free_file_extent_holes(struct rb_root *holes)
695 struct rb_node *node;
696 struct file_extent_hole *hole;
698 node = rb_first(holes);
700 hole = rb_entry(node, struct file_extent_hole, node);
701 rb_erase(node, holes);
703 node = rb_first(holes);
707 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
709 static void record_root_in_trans(struct btrfs_trans_handle *trans,
710 struct btrfs_root *root)
712 if (root->last_trans != trans->transid) {
713 root->track_dirty = 1;
714 root->last_trans = trans->transid;
715 root->commit_root = root->node;
716 extent_buffer_get(root->node);
720 static u8 imode_to_type(u32 imode)
723 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
724 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
725 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
726 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
727 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
728 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
729 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
730 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
733 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
737 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
739 struct device_record *rec1;
740 struct device_record *rec2;
742 rec1 = rb_entry(node1, struct device_record, node);
743 rec2 = rb_entry(node2, struct device_record, node);
744 if (rec1->devid > rec2->devid)
746 else if (rec1->devid < rec2->devid)
752 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
754 struct inode_record *rec;
755 struct inode_backref *backref;
756 struct inode_backref *orig;
757 struct inode_backref *tmp;
758 struct orphan_data_extent *src_orphan;
759 struct orphan_data_extent *dst_orphan;
763 rec = malloc(sizeof(*rec));
765 return ERR_PTR(-ENOMEM);
766 memcpy(rec, orig_rec, sizeof(*rec));
768 INIT_LIST_HEAD(&rec->backrefs);
769 INIT_LIST_HEAD(&rec->orphan_extents);
770 rec->holes = RB_ROOT;
772 list_for_each_entry(orig, &orig_rec->backrefs, list) {
773 size = sizeof(*orig) + orig->namelen + 1;
774 backref = malloc(size);
779 memcpy(backref, orig, size);
780 list_add_tail(&backref->list, &rec->backrefs);
782 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
783 dst_orphan = malloc(sizeof(*dst_orphan));
788 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
789 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
791 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
797 if (!list_empty(&rec->backrefs))
798 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
799 list_del(&orig->list);
803 if (!list_empty(&rec->orphan_extents))
804 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
805 list_del(&orig->list);
814 static void print_orphan_data_extents(struct list_head *orphan_extents,
817 struct orphan_data_extent *orphan;
819 if (list_empty(orphan_extents))
821 printf("The following data extent is lost in tree %llu:\n",
823 list_for_each_entry(orphan, orphan_extents, list) {
824 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
825 orphan->objectid, orphan->offset, orphan->disk_bytenr,
830 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
832 u64 root_objectid = root->root_key.objectid;
833 int errors = rec->errors;
837 /* reloc root errors, we print its corresponding fs root objectid*/
838 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
839 root_objectid = root->root_key.offset;
840 fprintf(stderr, "reloc");
842 fprintf(stderr, "root %llu inode %llu errors %x",
843 (unsigned long long) root_objectid,
844 (unsigned long long) rec->ino, rec->errors);
846 if (errors & I_ERR_NO_INODE_ITEM)
847 fprintf(stderr, ", no inode item");
848 if (errors & I_ERR_NO_ORPHAN_ITEM)
849 fprintf(stderr, ", no orphan item");
850 if (errors & I_ERR_DUP_INODE_ITEM)
851 fprintf(stderr, ", dup inode item");
852 if (errors & I_ERR_DUP_DIR_INDEX)
853 fprintf(stderr, ", dup dir index");
854 if (errors & I_ERR_ODD_DIR_ITEM)
855 fprintf(stderr, ", odd dir item");
856 if (errors & I_ERR_ODD_FILE_EXTENT)
857 fprintf(stderr, ", odd file extent");
858 if (errors & I_ERR_BAD_FILE_EXTENT)
859 fprintf(stderr, ", bad file extent");
860 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
861 fprintf(stderr, ", file extent overlap");
862 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
863 fprintf(stderr, ", file extent discount");
864 if (errors & I_ERR_DIR_ISIZE_WRONG)
865 fprintf(stderr, ", dir isize wrong");
866 if (errors & I_ERR_FILE_NBYTES_WRONG)
867 fprintf(stderr, ", nbytes wrong");
868 if (errors & I_ERR_ODD_CSUM_ITEM)
869 fprintf(stderr, ", odd csum item");
870 if (errors & I_ERR_SOME_CSUM_MISSING)
871 fprintf(stderr, ", some csum missing");
872 if (errors & I_ERR_LINK_COUNT_WRONG)
873 fprintf(stderr, ", link count wrong");
874 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
875 fprintf(stderr, ", orphan file extent");
876 fprintf(stderr, "\n");
877 /* Print the orphan extents if needed */
878 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
879 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
881 /* Print the holes if needed */
882 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
883 struct file_extent_hole *hole;
884 struct rb_node *node;
887 node = rb_first(&rec->holes);
888 fprintf(stderr, "Found file extent holes:\n");
891 hole = rb_entry(node, struct file_extent_hole, node);
892 fprintf(stderr, "\tstart: %llu, len: %llu\n",
893 hole->start, hole->len);
894 node = rb_next(node);
897 fprintf(stderr, "\tstart: 0, len: %llu\n",
898 round_up(rec->isize, root->sectorsize));
902 static void print_ref_error(int errors)
904 if (errors & REF_ERR_NO_DIR_ITEM)
905 fprintf(stderr, ", no dir item");
906 if (errors & REF_ERR_NO_DIR_INDEX)
907 fprintf(stderr, ", no dir index");
908 if (errors & REF_ERR_NO_INODE_REF)
909 fprintf(stderr, ", no inode ref");
910 if (errors & REF_ERR_DUP_DIR_ITEM)
911 fprintf(stderr, ", dup dir item");
912 if (errors & REF_ERR_DUP_DIR_INDEX)
913 fprintf(stderr, ", dup dir index");
914 if (errors & REF_ERR_DUP_INODE_REF)
915 fprintf(stderr, ", dup inode ref");
916 if (errors & REF_ERR_INDEX_UNMATCH)
917 fprintf(stderr, ", index mismatch");
918 if (errors & REF_ERR_FILETYPE_UNMATCH)
919 fprintf(stderr, ", filetype mismatch");
920 if (errors & REF_ERR_NAME_TOO_LONG)
921 fprintf(stderr, ", name too long");
922 if (errors & REF_ERR_NO_ROOT_REF)
923 fprintf(stderr, ", no root ref");
924 if (errors & REF_ERR_NO_ROOT_BACKREF)
925 fprintf(stderr, ", no root backref");
926 if (errors & REF_ERR_DUP_ROOT_REF)
927 fprintf(stderr, ", dup root ref");
928 if (errors & REF_ERR_DUP_ROOT_BACKREF)
929 fprintf(stderr, ", dup root backref");
930 fprintf(stderr, "\n");
933 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
936 struct ptr_node *node;
937 struct cache_extent *cache;
938 struct inode_record *rec = NULL;
941 cache = lookup_cache_extent(inode_cache, ino, 1);
943 node = container_of(cache, struct ptr_node, cache);
945 if (mod && rec->refs > 1) {
946 node->data = clone_inode_rec(rec);
947 if (IS_ERR(node->data))
953 rec = calloc(1, sizeof(*rec));
955 return ERR_PTR(-ENOMEM);
957 rec->extent_start = (u64)-1;
959 INIT_LIST_HEAD(&rec->backrefs);
960 INIT_LIST_HEAD(&rec->orphan_extents);
961 rec->holes = RB_ROOT;
963 node = malloc(sizeof(*node));
966 return ERR_PTR(-ENOMEM);
968 node->cache.start = ino;
969 node->cache.size = 1;
972 if (ino == BTRFS_FREE_INO_OBJECTID)
975 ret = insert_cache_extent(inode_cache, &node->cache);
977 return ERR_PTR(-EEXIST);
982 static void free_orphan_data_extents(struct list_head *orphan_extents)
984 struct orphan_data_extent *orphan;
986 while (!list_empty(orphan_extents)) {
987 orphan = list_entry(orphan_extents->next,
988 struct orphan_data_extent, list);
989 list_del(&orphan->list);
994 static void free_inode_rec(struct inode_record *rec)
996 struct inode_backref *backref;
1001 while (!list_empty(&rec->backrefs)) {
1002 backref = to_inode_backref(rec->backrefs.next);
1003 list_del(&backref->list);
1006 free_orphan_data_extents(&rec->orphan_extents);
1007 free_file_extent_holes(&rec->holes);
1011 static int can_free_inode_rec(struct inode_record *rec)
1013 if (!rec->errors && rec->checked && rec->found_inode_item &&
1014 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
1019 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
1020 struct inode_record *rec)
1022 struct cache_extent *cache;
1023 struct inode_backref *tmp, *backref;
1024 struct ptr_node *node;
1025 unsigned char filetype;
1027 if (!rec->found_inode_item)
1030 filetype = imode_to_type(rec->imode);
1031 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1032 if (backref->found_dir_item && backref->found_dir_index) {
1033 if (backref->filetype != filetype)
1034 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1035 if (!backref->errors && backref->found_inode_ref &&
1036 rec->nlink == rec->found_link) {
1037 list_del(&backref->list);
1043 if (!rec->checked || rec->merging)
1046 if (S_ISDIR(rec->imode)) {
1047 if (rec->found_size != rec->isize)
1048 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1049 if (rec->found_file_extent)
1050 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1051 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1052 if (rec->found_dir_item)
1053 rec->errors |= I_ERR_ODD_DIR_ITEM;
1054 if (rec->found_size != rec->nbytes)
1055 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1056 if (rec->nlink > 0 && !no_holes &&
1057 (rec->extent_end < rec->isize ||
1058 first_extent_gap(&rec->holes) < rec->isize))
1059 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1062 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1063 if (rec->found_csum_item && rec->nodatasum)
1064 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1065 if (rec->some_csum_missing && !rec->nodatasum)
1066 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1069 BUG_ON(rec->refs != 1);
1070 if (can_free_inode_rec(rec)) {
1071 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1072 node = container_of(cache, struct ptr_node, cache);
1073 BUG_ON(node->data != rec);
1074 remove_cache_extent(inode_cache, &node->cache);
1076 free_inode_rec(rec);
1080 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1082 struct btrfs_path path;
1083 struct btrfs_key key;
1086 key.objectid = BTRFS_ORPHAN_OBJECTID;
1087 key.type = BTRFS_ORPHAN_ITEM_KEY;
1090 btrfs_init_path(&path);
1091 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1092 btrfs_release_path(&path);
1098 static int process_inode_item(struct extent_buffer *eb,
1099 int slot, struct btrfs_key *key,
1100 struct shared_node *active_node)
1102 struct inode_record *rec;
1103 struct btrfs_inode_item *item;
1105 rec = active_node->current;
1106 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1107 if (rec->found_inode_item) {
1108 rec->errors |= I_ERR_DUP_INODE_ITEM;
1111 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1112 rec->nlink = btrfs_inode_nlink(eb, item);
1113 rec->isize = btrfs_inode_size(eb, item);
1114 rec->nbytes = btrfs_inode_nbytes(eb, item);
1115 rec->imode = btrfs_inode_mode(eb, item);
1116 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1118 rec->found_inode_item = 1;
1119 if (rec->nlink == 0)
1120 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1121 maybe_free_inode_rec(&active_node->inode_cache, rec);
1125 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1127 int namelen, u64 dir)
1129 struct inode_backref *backref;
1131 list_for_each_entry(backref, &rec->backrefs, list) {
1132 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1134 if (backref->dir != dir || backref->namelen != namelen)
1136 if (memcmp(name, backref->name, namelen))
1141 backref = malloc(sizeof(*backref) + namelen + 1);
1144 memset(backref, 0, sizeof(*backref));
1146 backref->namelen = namelen;
1147 memcpy(backref->name, name, namelen);
1148 backref->name[namelen] = '\0';
1149 list_add_tail(&backref->list, &rec->backrefs);
1153 static int add_inode_backref(struct cache_tree *inode_cache,
1154 u64 ino, u64 dir, u64 index,
1155 const char *name, int namelen,
1156 int filetype, int itemtype, int errors)
1158 struct inode_record *rec;
1159 struct inode_backref *backref;
1161 rec = get_inode_rec(inode_cache, ino, 1);
1162 BUG_ON(IS_ERR(rec));
1163 backref = get_inode_backref(rec, name, namelen, dir);
1166 backref->errors |= errors;
1167 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1168 if (backref->found_dir_index)
1169 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1170 if (backref->found_inode_ref && backref->index != index)
1171 backref->errors |= REF_ERR_INDEX_UNMATCH;
1172 if (backref->found_dir_item && backref->filetype != filetype)
1173 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1175 backref->index = index;
1176 backref->filetype = filetype;
1177 backref->found_dir_index = 1;
1178 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1180 if (backref->found_dir_item)
1181 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1182 if (backref->found_dir_index && backref->filetype != filetype)
1183 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1185 backref->filetype = filetype;
1186 backref->found_dir_item = 1;
1187 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1188 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1189 if (backref->found_inode_ref)
1190 backref->errors |= REF_ERR_DUP_INODE_REF;
1191 if (backref->found_dir_index && backref->index != index)
1192 backref->errors |= REF_ERR_INDEX_UNMATCH;
1194 backref->index = index;
1196 backref->ref_type = itemtype;
1197 backref->found_inode_ref = 1;
1202 maybe_free_inode_rec(inode_cache, rec);
1206 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1207 struct cache_tree *dst_cache)
1209 struct inode_backref *backref;
1214 list_for_each_entry(backref, &src->backrefs, list) {
1215 if (backref->found_dir_index) {
1216 add_inode_backref(dst_cache, dst->ino, backref->dir,
1217 backref->index, backref->name,
1218 backref->namelen, backref->filetype,
1219 BTRFS_DIR_INDEX_KEY, backref->errors);
1221 if (backref->found_dir_item) {
1223 add_inode_backref(dst_cache, dst->ino,
1224 backref->dir, 0, backref->name,
1225 backref->namelen, backref->filetype,
1226 BTRFS_DIR_ITEM_KEY, backref->errors);
1228 if (backref->found_inode_ref) {
1229 add_inode_backref(dst_cache, dst->ino,
1230 backref->dir, backref->index,
1231 backref->name, backref->namelen, 0,
1232 backref->ref_type, backref->errors);
1236 if (src->found_dir_item)
1237 dst->found_dir_item = 1;
1238 if (src->found_file_extent)
1239 dst->found_file_extent = 1;
1240 if (src->found_csum_item)
1241 dst->found_csum_item = 1;
1242 if (src->some_csum_missing)
1243 dst->some_csum_missing = 1;
1244 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1245 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1250 BUG_ON(src->found_link < dir_count);
1251 dst->found_link += src->found_link - dir_count;
1252 dst->found_size += src->found_size;
1253 if (src->extent_start != (u64)-1) {
1254 if (dst->extent_start == (u64)-1) {
1255 dst->extent_start = src->extent_start;
1256 dst->extent_end = src->extent_end;
1258 if (dst->extent_end > src->extent_start)
1259 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1260 else if (dst->extent_end < src->extent_start) {
1261 ret = add_file_extent_hole(&dst->holes,
1263 src->extent_start - dst->extent_end);
1265 if (dst->extent_end < src->extent_end)
1266 dst->extent_end = src->extent_end;
1270 dst->errors |= src->errors;
1271 if (src->found_inode_item) {
1272 if (!dst->found_inode_item) {
1273 dst->nlink = src->nlink;
1274 dst->isize = src->isize;
1275 dst->nbytes = src->nbytes;
1276 dst->imode = src->imode;
1277 dst->nodatasum = src->nodatasum;
1278 dst->found_inode_item = 1;
1280 dst->errors |= I_ERR_DUP_INODE_ITEM;
1288 static int splice_shared_node(struct shared_node *src_node,
1289 struct shared_node *dst_node)
1291 struct cache_extent *cache;
1292 struct ptr_node *node, *ins;
1293 struct cache_tree *src, *dst;
1294 struct inode_record *rec, *conflict;
1295 u64 current_ino = 0;
1299 if (--src_node->refs == 0)
1301 if (src_node->current)
1302 current_ino = src_node->current->ino;
1304 src = &src_node->root_cache;
1305 dst = &dst_node->root_cache;
1307 cache = search_cache_extent(src, 0);
1309 node = container_of(cache, struct ptr_node, cache);
1311 cache = next_cache_extent(cache);
1314 remove_cache_extent(src, &node->cache);
1317 ins = malloc(sizeof(*ins));
1319 ins->cache.start = node->cache.start;
1320 ins->cache.size = node->cache.size;
1324 ret = insert_cache_extent(dst, &ins->cache);
1325 if (ret == -EEXIST) {
1326 conflict = get_inode_rec(dst, rec->ino, 1);
1327 BUG_ON(IS_ERR(conflict));
1328 merge_inode_recs(rec, conflict, dst);
1330 conflict->checked = 1;
1331 if (dst_node->current == conflict)
1332 dst_node->current = NULL;
1334 maybe_free_inode_rec(dst, conflict);
1335 free_inode_rec(rec);
1342 if (src == &src_node->root_cache) {
1343 src = &src_node->inode_cache;
1344 dst = &dst_node->inode_cache;
1348 if (current_ino > 0 && (!dst_node->current ||
1349 current_ino > dst_node->current->ino)) {
1350 if (dst_node->current) {
1351 dst_node->current->checked = 1;
1352 maybe_free_inode_rec(dst, dst_node->current);
1354 dst_node->current = get_inode_rec(dst, current_ino, 1);
1355 BUG_ON(IS_ERR(dst_node->current));
1360 static void free_inode_ptr(struct cache_extent *cache)
1362 struct ptr_node *node;
1363 struct inode_record *rec;
1365 node = container_of(cache, struct ptr_node, cache);
1367 free_inode_rec(rec);
1371 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1373 static struct shared_node *find_shared_node(struct cache_tree *shared,
1376 struct cache_extent *cache;
1377 struct shared_node *node;
1379 cache = lookup_cache_extent(shared, bytenr, 1);
1381 node = container_of(cache, struct shared_node, cache);
1387 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1390 struct shared_node *node;
1392 node = calloc(1, sizeof(*node));
1395 node->cache.start = bytenr;
1396 node->cache.size = 1;
1397 cache_tree_init(&node->root_cache);
1398 cache_tree_init(&node->inode_cache);
1401 ret = insert_cache_extent(shared, &node->cache);
1406 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1407 struct walk_control *wc, int level)
1409 struct shared_node *node;
1410 struct shared_node *dest;
1413 if (level == wc->active_node)
1416 BUG_ON(wc->active_node <= level);
1417 node = find_shared_node(&wc->shared, bytenr);
1419 ret = add_shared_node(&wc->shared, bytenr, refs);
1421 node = find_shared_node(&wc->shared, bytenr);
1422 wc->nodes[level] = node;
1423 wc->active_node = level;
1427 if (wc->root_level == wc->active_node &&
1428 btrfs_root_refs(&root->root_item) == 0) {
1429 if (--node->refs == 0) {
1430 free_inode_recs_tree(&node->root_cache);
1431 free_inode_recs_tree(&node->inode_cache);
1432 remove_cache_extent(&wc->shared, &node->cache);
1438 dest = wc->nodes[wc->active_node];
1439 splice_shared_node(node, dest);
1440 if (node->refs == 0) {
1441 remove_cache_extent(&wc->shared, &node->cache);
1447 static int leave_shared_node(struct btrfs_root *root,
1448 struct walk_control *wc, int level)
1450 struct shared_node *node;
1451 struct shared_node *dest;
1454 if (level == wc->root_level)
1457 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1461 BUG_ON(i >= BTRFS_MAX_LEVEL);
1463 node = wc->nodes[wc->active_node];
1464 wc->nodes[wc->active_node] = NULL;
1465 wc->active_node = i;
1467 dest = wc->nodes[wc->active_node];
1468 if (wc->active_node < wc->root_level ||
1469 btrfs_root_refs(&root->root_item) > 0) {
1470 BUG_ON(node->refs <= 1);
1471 splice_shared_node(node, dest);
1473 BUG_ON(node->refs < 2);
1482 * 1 - if the root with id child_root_id is a child of root parent_root_id
1483 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1484 * has other root(s) as parent(s)
1485 * 2 - if the root child_root_id doesn't have any parent roots
1487 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1490 struct btrfs_path path;
1491 struct btrfs_key key;
1492 struct extent_buffer *leaf;
1496 btrfs_init_path(&path);
1498 key.objectid = parent_root_id;
1499 key.type = BTRFS_ROOT_REF_KEY;
1500 key.offset = child_root_id;
1501 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1505 btrfs_release_path(&path);
1509 key.objectid = child_root_id;
1510 key.type = BTRFS_ROOT_BACKREF_KEY;
1512 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1518 leaf = path.nodes[0];
1519 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1520 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1523 leaf = path.nodes[0];
1526 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1527 if (key.objectid != child_root_id ||
1528 key.type != BTRFS_ROOT_BACKREF_KEY)
1533 if (key.offset == parent_root_id) {
1534 btrfs_release_path(&path);
1541 btrfs_release_path(&path);
1544 return has_parent ? 0 : 2;
1547 static int process_dir_item(struct btrfs_root *root,
1548 struct extent_buffer *eb,
1549 int slot, struct btrfs_key *key,
1550 struct shared_node *active_node)
1560 struct btrfs_dir_item *di;
1561 struct inode_record *rec;
1562 struct cache_tree *root_cache;
1563 struct cache_tree *inode_cache;
1564 struct btrfs_key location;
1565 char namebuf[BTRFS_NAME_LEN];
1567 root_cache = &active_node->root_cache;
1568 inode_cache = &active_node->inode_cache;
1569 rec = active_node->current;
1570 rec->found_dir_item = 1;
1572 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1573 total = btrfs_item_size_nr(eb, slot);
1574 while (cur < total) {
1576 btrfs_dir_item_key_to_cpu(eb, di, &location);
1577 name_len = btrfs_dir_name_len(eb, di);
1578 data_len = btrfs_dir_data_len(eb, di);
1579 filetype = btrfs_dir_type(eb, di);
1581 rec->found_size += name_len;
1582 if (name_len <= BTRFS_NAME_LEN) {
1586 len = BTRFS_NAME_LEN;
1587 error = REF_ERR_NAME_TOO_LONG;
1589 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1591 if (location.type == BTRFS_INODE_ITEM_KEY) {
1592 add_inode_backref(inode_cache, location.objectid,
1593 key->objectid, key->offset, namebuf,
1594 len, filetype, key->type, error);
1595 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1596 add_inode_backref(root_cache, location.objectid,
1597 key->objectid, key->offset,
1598 namebuf, len, filetype,
1601 fprintf(stderr, "invalid location in dir item %u\n",
1603 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1604 key->objectid, key->offset, namebuf,
1605 len, filetype, key->type, error);
1608 len = sizeof(*di) + name_len + data_len;
1609 di = (struct btrfs_dir_item *)((char *)di + len);
1612 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1613 rec->errors |= I_ERR_DUP_DIR_INDEX;
1618 static int process_inode_ref(struct extent_buffer *eb,
1619 int slot, struct btrfs_key *key,
1620 struct shared_node *active_node)
1628 struct cache_tree *inode_cache;
1629 struct btrfs_inode_ref *ref;
1630 char namebuf[BTRFS_NAME_LEN];
1632 inode_cache = &active_node->inode_cache;
1634 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1635 total = btrfs_item_size_nr(eb, slot);
1636 while (cur < total) {
1637 name_len = btrfs_inode_ref_name_len(eb, ref);
1638 index = btrfs_inode_ref_index(eb, ref);
1639 if (name_len <= BTRFS_NAME_LEN) {
1643 len = BTRFS_NAME_LEN;
1644 error = REF_ERR_NAME_TOO_LONG;
1646 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1647 add_inode_backref(inode_cache, key->objectid, key->offset,
1648 index, namebuf, len, 0, key->type, error);
1650 len = sizeof(*ref) + name_len;
1651 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1657 static int process_inode_extref(struct extent_buffer *eb,
1658 int slot, struct btrfs_key *key,
1659 struct shared_node *active_node)
1668 struct cache_tree *inode_cache;
1669 struct btrfs_inode_extref *extref;
1670 char namebuf[BTRFS_NAME_LEN];
1672 inode_cache = &active_node->inode_cache;
1674 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1675 total = btrfs_item_size_nr(eb, slot);
1676 while (cur < total) {
1677 name_len = btrfs_inode_extref_name_len(eb, extref);
1678 index = btrfs_inode_extref_index(eb, extref);
1679 parent = btrfs_inode_extref_parent(eb, extref);
1680 if (name_len <= BTRFS_NAME_LEN) {
1684 len = BTRFS_NAME_LEN;
1685 error = REF_ERR_NAME_TOO_LONG;
1687 read_extent_buffer(eb, namebuf,
1688 (unsigned long)(extref + 1), len);
1689 add_inode_backref(inode_cache, key->objectid, parent,
1690 index, namebuf, len, 0, key->type, error);
1692 len = sizeof(*extref) + name_len;
1693 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1700 static int count_csum_range(struct btrfs_root *root, u64 start,
1701 u64 len, u64 *found)
1703 struct btrfs_key key;
1704 struct btrfs_path path;
1705 struct extent_buffer *leaf;
1710 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1712 btrfs_init_path(&path);
1714 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1716 key.type = BTRFS_EXTENT_CSUM_KEY;
1718 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1722 if (ret > 0 && path.slots[0] > 0) {
1723 leaf = path.nodes[0];
1724 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1725 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1726 key.type == BTRFS_EXTENT_CSUM_KEY)
1731 leaf = path.nodes[0];
1732 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1733 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1738 leaf = path.nodes[0];
1741 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1742 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1743 key.type != BTRFS_EXTENT_CSUM_KEY)
1746 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1747 if (key.offset >= start + len)
1750 if (key.offset > start)
1753 size = btrfs_item_size_nr(leaf, path.slots[0]);
1754 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1755 if (csum_end > start) {
1756 size = min(csum_end - start, len);
1765 btrfs_release_path(&path);
1771 static int process_file_extent(struct btrfs_root *root,
1772 struct extent_buffer *eb,
1773 int slot, struct btrfs_key *key,
1774 struct shared_node *active_node)
1776 struct inode_record *rec;
1777 struct btrfs_file_extent_item *fi;
1779 u64 disk_bytenr = 0;
1780 u64 extent_offset = 0;
1781 u64 mask = root->sectorsize - 1;
1785 rec = active_node->current;
1786 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1787 rec->found_file_extent = 1;
1789 if (rec->extent_start == (u64)-1) {
1790 rec->extent_start = key->offset;
1791 rec->extent_end = key->offset;
1794 if (rec->extent_end > key->offset)
1795 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1796 else if (rec->extent_end < key->offset) {
1797 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1798 key->offset - rec->extent_end);
1803 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1804 extent_type = btrfs_file_extent_type(eb, fi);
1806 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1807 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1809 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1810 rec->found_size += num_bytes;
1811 num_bytes = (num_bytes + mask) & ~mask;
1812 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1813 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1814 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1815 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1816 extent_offset = btrfs_file_extent_offset(eb, fi);
1817 if (num_bytes == 0 || (num_bytes & mask))
1818 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1819 if (num_bytes + extent_offset >
1820 btrfs_file_extent_ram_bytes(eb, fi))
1821 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1822 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1823 (btrfs_file_extent_compression(eb, fi) ||
1824 btrfs_file_extent_encryption(eb, fi) ||
1825 btrfs_file_extent_other_encoding(eb, fi)))
1826 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1827 if (disk_bytenr > 0)
1828 rec->found_size += num_bytes;
1830 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1832 rec->extent_end = key->offset + num_bytes;
1835 * The data reloc tree will copy full extents into its inode and then
1836 * copy the corresponding csums. Because the extent it copied could be
1837 * a preallocated extent that hasn't been written to yet there may be no
1838 * csums to copy, ergo we won't have csums for our file extent. This is
1839 * ok so just don't bother checking csums if the inode belongs to the
1842 if (disk_bytenr > 0 &&
1843 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1845 if (btrfs_file_extent_compression(eb, fi))
1846 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1848 disk_bytenr += extent_offset;
1850 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1853 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1855 rec->found_csum_item = 1;
1856 if (found < num_bytes)
1857 rec->some_csum_missing = 1;
1858 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1860 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1866 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1867 struct walk_control *wc)
1869 struct btrfs_key key;
1873 struct cache_tree *inode_cache;
1874 struct shared_node *active_node;
1876 if (wc->root_level == wc->active_node &&
1877 btrfs_root_refs(&root->root_item) == 0)
1880 active_node = wc->nodes[wc->active_node];
1881 inode_cache = &active_node->inode_cache;
1882 nritems = btrfs_header_nritems(eb);
1883 for (i = 0; i < nritems; i++) {
1884 btrfs_item_key_to_cpu(eb, &key, i);
1886 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1888 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1891 if (active_node->current == NULL ||
1892 active_node->current->ino < key.objectid) {
1893 if (active_node->current) {
1894 active_node->current->checked = 1;
1895 maybe_free_inode_rec(inode_cache,
1896 active_node->current);
1898 active_node->current = get_inode_rec(inode_cache,
1900 BUG_ON(IS_ERR(active_node->current));
1903 case BTRFS_DIR_ITEM_KEY:
1904 case BTRFS_DIR_INDEX_KEY:
1905 ret = process_dir_item(root, eb, i, &key, active_node);
1907 case BTRFS_INODE_REF_KEY:
1908 ret = process_inode_ref(eb, i, &key, active_node);
1910 case BTRFS_INODE_EXTREF_KEY:
1911 ret = process_inode_extref(eb, i, &key, active_node);
1913 case BTRFS_INODE_ITEM_KEY:
1914 ret = process_inode_item(eb, i, &key, active_node);
1916 case BTRFS_EXTENT_DATA_KEY:
1917 ret = process_file_extent(root, eb, i, &key,
1927 static void reada_walk_down(struct btrfs_root *root,
1928 struct extent_buffer *node, int slot)
1937 level = btrfs_header_level(node);
1941 nritems = btrfs_header_nritems(node);
1942 blocksize = root->nodesize;
1943 for (i = slot; i < nritems; i++) {
1944 bytenr = btrfs_node_blockptr(node, i);
1945 ptr_gen = btrfs_node_ptr_generation(node, i);
1946 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1951 * Check the child node/leaf by the following condition:
1952 * 1. the first item key of the node/leaf should be the same with the one
1954 * 2. block in parent node should match the child node/leaf.
1955 * 3. generation of parent node and child's header should be consistent.
1957 * Or the child node/leaf pointed by the key in parent is not valid.
1959 * We hope to check leaf owner too, but since subvol may share leaves,
1960 * which makes leaf owner check not so strong, key check should be
1961 * sufficient enough for that case.
1963 static int check_child_node(struct btrfs_root *root,
1964 struct extent_buffer *parent, int slot,
1965 struct extent_buffer *child)
1967 struct btrfs_key parent_key;
1968 struct btrfs_key child_key;
1971 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1972 if (btrfs_header_level(child) == 0)
1973 btrfs_item_key_to_cpu(child, &child_key, 0);
1975 btrfs_node_key_to_cpu(child, &child_key, 0);
1977 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1980 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1981 parent_key.objectid, parent_key.type, parent_key.offset,
1982 child_key.objectid, child_key.type, child_key.offset);
1984 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1986 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1987 btrfs_node_blockptr(parent, slot),
1988 btrfs_header_bytenr(child));
1990 if (btrfs_node_ptr_generation(parent, slot) !=
1991 btrfs_header_generation(child)) {
1993 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1994 btrfs_header_generation(child),
1995 btrfs_node_ptr_generation(parent, slot));
2001 u64 bytenr[BTRFS_MAX_LEVEL];
2002 u64 refs[BTRFS_MAX_LEVEL];
2005 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2006 struct walk_control *wc, int *level,
2007 struct node_refs *nrefs)
2009 enum btrfs_tree_block_status status;
2012 struct extent_buffer *next;
2013 struct extent_buffer *cur;
2018 WARN_ON(*level < 0);
2019 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2021 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2022 refs = nrefs->refs[*level];
2025 ret = btrfs_lookup_extent_info(NULL, root,
2026 path->nodes[*level]->start,
2027 *level, 1, &refs, NULL);
2032 nrefs->bytenr[*level] = path->nodes[*level]->start;
2033 nrefs->refs[*level] = refs;
2037 ret = enter_shared_node(root, path->nodes[*level]->start,
2045 while (*level >= 0) {
2046 WARN_ON(*level < 0);
2047 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2048 cur = path->nodes[*level];
2050 if (btrfs_header_level(cur) != *level)
2053 if (path->slots[*level] >= btrfs_header_nritems(cur))
2056 ret = process_one_leaf(root, cur, wc);
2061 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2062 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2063 blocksize = root->nodesize;
2065 if (bytenr == nrefs->bytenr[*level - 1]) {
2066 refs = nrefs->refs[*level - 1];
2068 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2069 *level - 1, 1, &refs, NULL);
2073 nrefs->bytenr[*level - 1] = bytenr;
2074 nrefs->refs[*level - 1] = refs;
2079 ret = enter_shared_node(root, bytenr, refs,
2082 path->slots[*level]++;
2087 next = btrfs_find_tree_block(root, bytenr, blocksize);
2088 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2089 free_extent_buffer(next);
2090 reada_walk_down(root, cur, path->slots[*level]);
2091 next = read_tree_block(root, bytenr, blocksize,
2093 if (!extent_buffer_uptodate(next)) {
2094 struct btrfs_key node_key;
2096 btrfs_node_key_to_cpu(path->nodes[*level],
2098 path->slots[*level]);
2099 btrfs_add_corrupt_extent_record(root->fs_info,
2101 path->nodes[*level]->start,
2102 root->nodesize, *level);
2108 ret = check_child_node(root, cur, path->slots[*level], next);
2114 if (btrfs_is_leaf(next))
2115 status = btrfs_check_leaf(root, NULL, next);
2117 status = btrfs_check_node(root, NULL, next);
2118 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2119 free_extent_buffer(next);
2124 *level = *level - 1;
2125 free_extent_buffer(path->nodes[*level]);
2126 path->nodes[*level] = next;
2127 path->slots[*level] = 0;
2130 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2134 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2135 struct walk_control *wc, int *level)
2138 struct extent_buffer *leaf;
2140 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2141 leaf = path->nodes[i];
2142 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2147 free_extent_buffer(path->nodes[*level]);
2148 path->nodes[*level] = NULL;
2149 BUG_ON(*level > wc->active_node);
2150 if (*level == wc->active_node)
2151 leave_shared_node(root, wc, *level);
2158 static int check_root_dir(struct inode_record *rec)
2160 struct inode_backref *backref;
2163 if (!rec->found_inode_item || rec->errors)
2165 if (rec->nlink != 1 || rec->found_link != 0)
2167 if (list_empty(&rec->backrefs))
2169 backref = to_inode_backref(rec->backrefs.next);
2170 if (!backref->found_inode_ref)
2172 if (backref->index != 0 || backref->namelen != 2 ||
2173 memcmp(backref->name, "..", 2))
2175 if (backref->found_dir_index || backref->found_dir_item)
2182 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2183 struct btrfs_root *root, struct btrfs_path *path,
2184 struct inode_record *rec)
2186 struct btrfs_inode_item *ei;
2187 struct btrfs_key key;
2190 key.objectid = rec->ino;
2191 key.type = BTRFS_INODE_ITEM_KEY;
2192 key.offset = (u64)-1;
2194 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2198 if (!path->slots[0]) {
2205 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2206 if (key.objectid != rec->ino) {
2211 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2212 struct btrfs_inode_item);
2213 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2214 btrfs_mark_buffer_dirty(path->nodes[0]);
2215 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2216 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2217 root->root_key.objectid);
2219 btrfs_release_path(path);
2223 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2224 struct btrfs_root *root,
2225 struct btrfs_path *path,
2226 struct inode_record *rec)
2230 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2231 btrfs_release_path(path);
2233 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2237 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2238 struct btrfs_root *root,
2239 struct btrfs_path *path,
2240 struct inode_record *rec)
2242 struct btrfs_inode_item *ei;
2243 struct btrfs_key key;
2246 key.objectid = rec->ino;
2247 key.type = BTRFS_INODE_ITEM_KEY;
2250 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2257 /* Since ret == 0, no need to check anything */
2258 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2259 struct btrfs_inode_item);
2260 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2261 btrfs_mark_buffer_dirty(path->nodes[0]);
2262 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2263 printf("reset nbytes for ino %llu root %llu\n",
2264 rec->ino, root->root_key.objectid);
2266 btrfs_release_path(path);
2270 static int add_missing_dir_index(struct btrfs_root *root,
2271 struct cache_tree *inode_cache,
2272 struct inode_record *rec,
2273 struct inode_backref *backref)
2275 struct btrfs_path *path;
2276 struct btrfs_trans_handle *trans;
2277 struct btrfs_dir_item *dir_item;
2278 struct extent_buffer *leaf;
2279 struct btrfs_key key;
2280 struct btrfs_disk_key disk_key;
2281 struct inode_record *dir_rec;
2282 unsigned long name_ptr;
2283 u32 data_size = sizeof(*dir_item) + backref->namelen;
2286 path = btrfs_alloc_path();
2290 trans = btrfs_start_transaction(root, 1);
2291 if (IS_ERR(trans)) {
2292 btrfs_free_path(path);
2293 return PTR_ERR(trans);
2296 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2297 (unsigned long long)rec->ino);
2298 key.objectid = backref->dir;
2299 key.type = BTRFS_DIR_INDEX_KEY;
2300 key.offset = backref->index;
2302 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2305 leaf = path->nodes[0];
2306 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2308 disk_key.objectid = cpu_to_le64(rec->ino);
2309 disk_key.type = BTRFS_INODE_ITEM_KEY;
2310 disk_key.offset = 0;
2312 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2313 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2314 btrfs_set_dir_data_len(leaf, dir_item, 0);
2315 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2316 name_ptr = (unsigned long)(dir_item + 1);
2317 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2318 btrfs_mark_buffer_dirty(leaf);
2319 btrfs_free_path(path);
2320 btrfs_commit_transaction(trans, root);
2322 backref->found_dir_index = 1;
2323 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2324 BUG_ON(IS_ERR(dir_rec));
2327 dir_rec->found_size += backref->namelen;
2328 if (dir_rec->found_size == dir_rec->isize &&
2329 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2330 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2331 if (dir_rec->found_size != dir_rec->isize)
2332 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2337 static int delete_dir_index(struct btrfs_root *root,
2338 struct cache_tree *inode_cache,
2339 struct inode_record *rec,
2340 struct inode_backref *backref)
2342 struct btrfs_trans_handle *trans;
2343 struct btrfs_dir_item *di;
2344 struct btrfs_path *path;
2347 path = btrfs_alloc_path();
2351 trans = btrfs_start_transaction(root, 1);
2352 if (IS_ERR(trans)) {
2353 btrfs_free_path(path);
2354 return PTR_ERR(trans);
2358 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2359 (unsigned long long)backref->dir,
2360 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2361 (unsigned long long)root->objectid);
2363 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2364 backref->name, backref->namelen,
2365 backref->index, -1);
2368 btrfs_free_path(path);
2369 btrfs_commit_transaction(trans, root);
2376 ret = btrfs_del_item(trans, root, path);
2378 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2380 btrfs_free_path(path);
2381 btrfs_commit_transaction(trans, root);
2385 static int create_inode_item(struct btrfs_root *root,
2386 struct inode_record *rec,
2387 struct inode_backref *backref, int root_dir)
2389 struct btrfs_trans_handle *trans;
2390 struct btrfs_inode_item inode_item;
2391 time_t now = time(NULL);
2394 trans = btrfs_start_transaction(root, 1);
2395 if (IS_ERR(trans)) {
2396 ret = PTR_ERR(trans);
2400 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2401 "be incomplete, please check permissions and content after "
2402 "the fsck completes.\n", (unsigned long long)root->objectid,
2403 (unsigned long long)rec->ino);
2405 memset(&inode_item, 0, sizeof(inode_item));
2406 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2408 btrfs_set_stack_inode_nlink(&inode_item, 1);
2410 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2411 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2412 if (rec->found_dir_item) {
2413 if (rec->found_file_extent)
2414 fprintf(stderr, "root %llu inode %llu has both a dir "
2415 "item and extents, unsure if it is a dir or a "
2416 "regular file so setting it as a directory\n",
2417 (unsigned long long)root->objectid,
2418 (unsigned long long)rec->ino);
2419 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2420 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2421 } else if (!rec->found_dir_item) {
2422 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2423 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2425 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2426 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2427 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2428 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2429 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2430 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2431 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2432 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2434 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2436 btrfs_commit_transaction(trans, root);
2440 static int repair_inode_backrefs(struct btrfs_root *root,
2441 struct inode_record *rec,
2442 struct cache_tree *inode_cache,
2445 struct inode_backref *tmp, *backref;
2446 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2450 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2451 if (!delete && rec->ino == root_dirid) {
2452 if (!rec->found_inode_item) {
2453 ret = create_inode_item(root, rec, backref, 1);
2460 /* Index 0 for root dir's are special, don't mess with it */
2461 if (rec->ino == root_dirid && backref->index == 0)
2465 ((backref->found_dir_index && !backref->found_inode_ref) ||
2466 (backref->found_dir_index && backref->found_inode_ref &&
2467 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2468 ret = delete_dir_index(root, inode_cache, rec, backref);
2472 list_del(&backref->list);
2476 if (!delete && !backref->found_dir_index &&
2477 backref->found_dir_item && backref->found_inode_ref) {
2478 ret = add_missing_dir_index(root, inode_cache, rec,
2483 if (backref->found_dir_item &&
2484 backref->found_dir_index &&
2485 backref->found_dir_index) {
2486 if (!backref->errors &&
2487 backref->found_inode_ref) {
2488 list_del(&backref->list);
2494 if (!delete && (!backref->found_dir_index &&
2495 !backref->found_dir_item &&
2496 backref->found_inode_ref)) {
2497 struct btrfs_trans_handle *trans;
2498 struct btrfs_key location;
2500 ret = check_dir_conflict(root, backref->name,
2506 * let nlink fixing routine to handle it,
2507 * which can do it better.
2512 location.objectid = rec->ino;
2513 location.type = BTRFS_INODE_ITEM_KEY;
2514 location.offset = 0;
2516 trans = btrfs_start_transaction(root, 1);
2517 if (IS_ERR(trans)) {
2518 ret = PTR_ERR(trans);
2521 fprintf(stderr, "adding missing dir index/item pair "
2523 (unsigned long long)rec->ino);
2524 ret = btrfs_insert_dir_item(trans, root, backref->name,
2526 backref->dir, &location,
2527 imode_to_type(rec->imode),
2530 btrfs_commit_transaction(trans, root);
2534 if (!delete && (backref->found_inode_ref &&
2535 backref->found_dir_index &&
2536 backref->found_dir_item &&
2537 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2538 !rec->found_inode_item)) {
2539 ret = create_inode_item(root, rec, backref, 0);
2546 return ret ? ret : repaired;
2550 * To determine the file type for nlink/inode_item repair
2552 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2553 * Return -ENOENT if file type is not found.
2555 static int find_file_type(struct inode_record *rec, u8 *type)
2557 struct inode_backref *backref;
2559 /* For inode item recovered case */
2560 if (rec->found_inode_item) {
2561 *type = imode_to_type(rec->imode);
2565 list_for_each_entry(backref, &rec->backrefs, list) {
2566 if (backref->found_dir_index || backref->found_dir_item) {
2567 *type = backref->filetype;
2575 * To determine the file name for nlink repair
2577 * Return 0 if file name is found, set name and namelen.
2578 * Return -ENOENT if file name is not found.
2580 static int find_file_name(struct inode_record *rec,
2581 char *name, int *namelen)
2583 struct inode_backref *backref;
2585 list_for_each_entry(backref, &rec->backrefs, list) {
2586 if (backref->found_dir_index || backref->found_dir_item ||
2587 backref->found_inode_ref) {
2588 memcpy(name, backref->name, backref->namelen);
2589 *namelen = backref->namelen;
2596 /* Reset the nlink of the inode to the correct one */
2597 static int reset_nlink(struct btrfs_trans_handle *trans,
2598 struct btrfs_root *root,
2599 struct btrfs_path *path,
2600 struct inode_record *rec)
2602 struct inode_backref *backref;
2603 struct inode_backref *tmp;
2604 struct btrfs_key key;
2605 struct btrfs_inode_item *inode_item;
2608 /* We don't believe this either, reset it and iterate backref */
2609 rec->found_link = 0;
2611 /* Remove all backref including the valid ones */
2612 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2613 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2614 backref->index, backref->name,
2615 backref->namelen, 0);
2619 /* remove invalid backref, so it won't be added back */
2620 if (!(backref->found_dir_index &&
2621 backref->found_dir_item &&
2622 backref->found_inode_ref)) {
2623 list_del(&backref->list);
2630 /* Set nlink to 0 */
2631 key.objectid = rec->ino;
2632 key.type = BTRFS_INODE_ITEM_KEY;
2634 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2641 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2642 struct btrfs_inode_item);
2643 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2644 btrfs_mark_buffer_dirty(path->nodes[0]);
2645 btrfs_release_path(path);
2648 * Add back valid inode_ref/dir_item/dir_index,
2649 * add_link() will handle the nlink inc, so new nlink must be correct
2651 list_for_each_entry(backref, &rec->backrefs, list) {
2652 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2653 backref->name, backref->namelen,
2654 backref->filetype, &backref->index, 1);
2659 btrfs_release_path(path);
2663 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2664 struct btrfs_root *root,
2665 struct btrfs_path *path,
2666 struct inode_record *rec)
2668 char *dir_name = "lost+found";
2669 char namebuf[BTRFS_NAME_LEN] = {0};
2674 int name_recovered = 0;
2675 int type_recovered = 0;
2679 * Get file name and type first before these invalid inode ref
2680 * are deleted by remove_all_invalid_backref()
2682 name_recovered = !find_file_name(rec, namebuf, &namelen);
2683 type_recovered = !find_file_type(rec, &type);
2685 if (!name_recovered) {
2686 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2687 rec->ino, rec->ino);
2688 namelen = count_digits(rec->ino);
2689 sprintf(namebuf, "%llu", rec->ino);
2692 if (!type_recovered) {
2693 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2695 type = BTRFS_FT_REG_FILE;
2699 ret = reset_nlink(trans, root, path, rec);
2702 "Failed to reset nlink for inode %llu: %s\n",
2703 rec->ino, strerror(-ret));
2707 if (rec->found_link == 0) {
2708 lost_found_ino = root->highest_inode;
2709 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2714 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2715 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2718 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2719 dir_name, strerror(-ret));
2722 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2723 namebuf, namelen, type, NULL, 1);
2725 * Add ".INO" suffix several times to handle case where
2726 * "FILENAME.INO" is already taken by another file.
2728 while (ret == -EEXIST) {
2730 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2732 if (namelen + count_digits(rec->ino) + 1 >
2737 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2739 namelen += count_digits(rec->ino) + 1;
2740 ret = btrfs_add_link(trans, root, rec->ino,
2741 lost_found_ino, namebuf,
2742 namelen, type, NULL, 1);
2746 "Failed to link the inode %llu to %s dir: %s\n",
2747 rec->ino, dir_name, strerror(-ret));
2751 * Just increase the found_link, don't actually add the
2752 * backref. This will make things easier and this inode
2753 * record will be freed after the repair is done.
2754 * So fsck will not report problem about this inode.
2757 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2758 namelen, namebuf, dir_name);
2760 printf("Fixed the nlink of inode %llu\n", rec->ino);
2763 * Clear the flag anyway, or we will loop forever for the same inode
2764 * as it will not be removed from the bad inode list and the dead loop
2767 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2768 btrfs_release_path(path);
2773 * Check if there is any normal(reg or prealloc) file extent for given
2775 * This is used to determine the file type when neither its dir_index/item or
2776 * inode_item exists.
2778 * This will *NOT* report error, if any error happens, just consider it does
2779 * not have any normal file extent.
2781 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2783 struct btrfs_path *path;
2784 struct btrfs_key key;
2785 struct btrfs_key found_key;
2786 struct btrfs_file_extent_item *fi;
2790 path = btrfs_alloc_path();
2794 key.type = BTRFS_EXTENT_DATA_KEY;
2797 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2802 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2803 ret = btrfs_next_leaf(root, path);
2810 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2812 if (found_key.objectid != ino ||
2813 found_key.type != BTRFS_EXTENT_DATA_KEY)
2815 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2816 struct btrfs_file_extent_item);
2817 type = btrfs_file_extent_type(path->nodes[0], fi);
2818 if (type != BTRFS_FILE_EXTENT_INLINE) {
2824 btrfs_free_path(path);
2828 static u32 btrfs_type_to_imode(u8 type)
2830 static u32 imode_by_btrfs_type[] = {
2831 [BTRFS_FT_REG_FILE] = S_IFREG,
2832 [BTRFS_FT_DIR] = S_IFDIR,
2833 [BTRFS_FT_CHRDEV] = S_IFCHR,
2834 [BTRFS_FT_BLKDEV] = S_IFBLK,
2835 [BTRFS_FT_FIFO] = S_IFIFO,
2836 [BTRFS_FT_SOCK] = S_IFSOCK,
2837 [BTRFS_FT_SYMLINK] = S_IFLNK,
2840 return imode_by_btrfs_type[(type)];
2843 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2844 struct btrfs_root *root,
2845 struct btrfs_path *path,
2846 struct inode_record *rec)
2850 int type_recovered = 0;
2853 printf("Trying to rebuild inode:%llu\n", rec->ino);
2855 type_recovered = !find_file_type(rec, &filetype);
2858 * Try to determine inode type if type not found.
2860 * For found regular file extent, it must be FILE.
2861 * For found dir_item/index, it must be DIR.
2863 * For undetermined one, use FILE as fallback.
2866 * 1. If found backref(inode_index/item is already handled) to it,
2868 * Need new inode-inode ref structure to allow search for that.
2870 if (!type_recovered) {
2871 if (rec->found_file_extent &&
2872 find_normal_file_extent(root, rec->ino)) {
2874 filetype = BTRFS_FT_REG_FILE;
2875 } else if (rec->found_dir_item) {
2877 filetype = BTRFS_FT_DIR;
2878 } else if (!list_empty(&rec->orphan_extents)) {
2880 filetype = BTRFS_FT_REG_FILE;
2882 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2885 filetype = BTRFS_FT_REG_FILE;
2889 ret = btrfs_new_inode(trans, root, rec->ino,
2890 mode | btrfs_type_to_imode(filetype));
2895 * Here inode rebuild is done, we only rebuild the inode item,
2896 * don't repair the nlink(like move to lost+found).
2897 * That is the job of nlink repair.
2899 * We just fill the record and return
2901 rec->found_dir_item = 1;
2902 rec->imode = mode | btrfs_type_to_imode(filetype);
2904 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2905 /* Ensure the inode_nlinks repair function will be called */
2906 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2911 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2912 struct btrfs_root *root,
2913 struct btrfs_path *path,
2914 struct inode_record *rec)
2916 struct orphan_data_extent *orphan;
2917 struct orphan_data_extent *tmp;
2920 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2922 * Check for conflicting file extents
2924 * Here we don't know whether the extents is compressed or not,
2925 * so we can only assume it not compressed nor data offset,
2926 * and use its disk_len as extent length.
2928 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2929 orphan->offset, orphan->disk_len, 0);
2930 btrfs_release_path(path);
2935 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2936 orphan->disk_bytenr, orphan->disk_len);
2937 ret = btrfs_free_extent(trans,
2938 root->fs_info->extent_root,
2939 orphan->disk_bytenr, orphan->disk_len,
2940 0, root->objectid, orphan->objectid,
2945 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2946 orphan->offset, orphan->disk_bytenr,
2947 orphan->disk_len, orphan->disk_len);
2951 /* Update file size info */
2952 rec->found_size += orphan->disk_len;
2953 if (rec->found_size == rec->nbytes)
2954 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2956 /* Update the file extent hole info too */
2957 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2961 if (RB_EMPTY_ROOT(&rec->holes))
2962 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2964 list_del(&orphan->list);
2967 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2972 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2973 struct btrfs_root *root,
2974 struct btrfs_path *path,
2975 struct inode_record *rec)
2977 struct rb_node *node;
2978 struct file_extent_hole *hole;
2982 node = rb_first(&rec->holes);
2986 hole = rb_entry(node, struct file_extent_hole, node);
2987 ret = btrfs_punch_hole(trans, root, rec->ino,
2988 hole->start, hole->len);
2991 ret = del_file_extent_hole(&rec->holes, hole->start,
2995 if (RB_EMPTY_ROOT(&rec->holes))
2996 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2997 node = rb_first(&rec->holes);
2999 /* special case for a file losing all its file extent */
3001 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3002 round_up(rec->isize, root->sectorsize));
3006 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3007 rec->ino, root->objectid);
3012 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3014 struct btrfs_trans_handle *trans;
3015 struct btrfs_path *path;
3018 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3019 I_ERR_NO_ORPHAN_ITEM |
3020 I_ERR_LINK_COUNT_WRONG |
3021 I_ERR_NO_INODE_ITEM |
3022 I_ERR_FILE_EXTENT_ORPHAN |
3023 I_ERR_FILE_EXTENT_DISCOUNT|
3024 I_ERR_FILE_NBYTES_WRONG)))
3027 path = btrfs_alloc_path();
3032 * For nlink repair, it may create a dir and add link, so
3033 * 2 for parent(256)'s dir_index and dir_item
3034 * 2 for lost+found dir's inode_item and inode_ref
3035 * 1 for the new inode_ref of the file
3036 * 2 for lost+found dir's dir_index and dir_item for the file
3038 trans = btrfs_start_transaction(root, 7);
3039 if (IS_ERR(trans)) {
3040 btrfs_free_path(path);
3041 return PTR_ERR(trans);
3044 if (rec->errors & I_ERR_NO_INODE_ITEM)
3045 ret = repair_inode_no_item(trans, root, path, rec);
3046 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3047 ret = repair_inode_orphan_extent(trans, root, path, rec);
3048 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3049 ret = repair_inode_discount_extent(trans, root, path, rec);
3050 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3051 ret = repair_inode_isize(trans, root, path, rec);
3052 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3053 ret = repair_inode_orphan_item(trans, root, path, rec);
3054 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3055 ret = repair_inode_nlinks(trans, root, path, rec);
3056 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3057 ret = repair_inode_nbytes(trans, root, path, rec);
3058 btrfs_commit_transaction(trans, root);
3059 btrfs_free_path(path);
3063 static int check_inode_recs(struct btrfs_root *root,
3064 struct cache_tree *inode_cache)
3066 struct cache_extent *cache;
3067 struct ptr_node *node;
3068 struct inode_record *rec;
3069 struct inode_backref *backref;
3074 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3076 if (btrfs_root_refs(&root->root_item) == 0) {
3077 if (!cache_tree_empty(inode_cache))
3078 fprintf(stderr, "warning line %d\n", __LINE__);
3083 * We need to record the highest inode number for later 'lost+found'
3085 * We must select an ino not used/referred by any existing inode, or
3086 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3087 * this may cause 'lost+found' dir has wrong nlinks.
3089 cache = last_cache_extent(inode_cache);
3091 node = container_of(cache, struct ptr_node, cache);
3093 if (rec->ino > root->highest_inode)
3094 root->highest_inode = rec->ino;
3098 * We need to repair backrefs first because we could change some of the
3099 * errors in the inode recs.
3101 * We also need to go through and delete invalid backrefs first and then
3102 * add the correct ones second. We do this because we may get EEXIST
3103 * when adding back the correct index because we hadn't yet deleted the
3106 * For example, if we were missing a dir index then the directories
3107 * isize would be wrong, so if we fixed the isize to what we thought it
3108 * would be and then fixed the backref we'd still have a invalid fs, so
3109 * we need to add back the dir index and then check to see if the isize
3114 if (stage == 3 && !err)
3117 cache = search_cache_extent(inode_cache, 0);
3118 while (repair && cache) {
3119 node = container_of(cache, struct ptr_node, cache);
3121 cache = next_cache_extent(cache);
3123 /* Need to free everything up and rescan */
3125 remove_cache_extent(inode_cache, &node->cache);
3127 free_inode_rec(rec);
3131 if (list_empty(&rec->backrefs))
3134 ret = repair_inode_backrefs(root, rec, inode_cache,
3148 rec = get_inode_rec(inode_cache, root_dirid, 0);
3149 BUG_ON(IS_ERR(rec));
3151 ret = check_root_dir(rec);
3153 fprintf(stderr, "root %llu root dir %llu error\n",
3154 (unsigned long long)root->root_key.objectid,
3155 (unsigned long long)root_dirid);
3156 print_inode_error(root, rec);
3161 struct btrfs_trans_handle *trans;
3163 trans = btrfs_start_transaction(root, 1);
3164 if (IS_ERR(trans)) {
3165 err = PTR_ERR(trans);
3170 "root %llu missing its root dir, recreating\n",
3171 (unsigned long long)root->objectid);
3173 ret = btrfs_make_root_dir(trans, root, root_dirid);
3176 btrfs_commit_transaction(trans, root);
3180 fprintf(stderr, "root %llu root dir %llu not found\n",
3181 (unsigned long long)root->root_key.objectid,
3182 (unsigned long long)root_dirid);
3186 cache = search_cache_extent(inode_cache, 0);
3189 node = container_of(cache, struct ptr_node, cache);
3191 remove_cache_extent(inode_cache, &node->cache);
3193 if (rec->ino == root_dirid ||
3194 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3195 free_inode_rec(rec);
3199 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3200 ret = check_orphan_item(root, rec->ino);
3202 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3203 if (can_free_inode_rec(rec)) {
3204 free_inode_rec(rec);
3209 if (!rec->found_inode_item)
3210 rec->errors |= I_ERR_NO_INODE_ITEM;
3211 if (rec->found_link != rec->nlink)
3212 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3214 ret = try_repair_inode(root, rec);
3215 if (ret == 0 && can_free_inode_rec(rec)) {
3216 free_inode_rec(rec);
3222 if (!(repair && ret == 0))
3224 print_inode_error(root, rec);
3225 list_for_each_entry(backref, &rec->backrefs, list) {
3226 if (!backref->found_dir_item)
3227 backref->errors |= REF_ERR_NO_DIR_ITEM;
3228 if (!backref->found_dir_index)
3229 backref->errors |= REF_ERR_NO_DIR_INDEX;
3230 if (!backref->found_inode_ref)
3231 backref->errors |= REF_ERR_NO_INODE_REF;
3232 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3233 " namelen %u name %s filetype %d errors %x",
3234 (unsigned long long)backref->dir,
3235 (unsigned long long)backref->index,
3236 backref->namelen, backref->name,
3237 backref->filetype, backref->errors);
3238 print_ref_error(backref->errors);
3240 free_inode_rec(rec);
3242 return (error > 0) ? -1 : 0;
3245 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3248 struct cache_extent *cache;
3249 struct root_record *rec = NULL;
3252 cache = lookup_cache_extent(root_cache, objectid, 1);
3254 rec = container_of(cache, struct root_record, cache);
3256 rec = calloc(1, sizeof(*rec));
3258 return ERR_PTR(-ENOMEM);
3259 rec->objectid = objectid;
3260 INIT_LIST_HEAD(&rec->backrefs);
3261 rec->cache.start = objectid;
3262 rec->cache.size = 1;
3264 ret = insert_cache_extent(root_cache, &rec->cache);
3266 return ERR_PTR(-EEXIST);
3271 static struct root_backref *get_root_backref(struct root_record *rec,
3272 u64 ref_root, u64 dir, u64 index,
3273 const char *name, int namelen)
3275 struct root_backref *backref;
3277 list_for_each_entry(backref, &rec->backrefs, list) {
3278 if (backref->ref_root != ref_root || backref->dir != dir ||
3279 backref->namelen != namelen)
3281 if (memcmp(name, backref->name, namelen))
3286 backref = calloc(1, sizeof(*backref) + namelen + 1);
3289 backref->ref_root = ref_root;
3291 backref->index = index;
3292 backref->namelen = namelen;
3293 memcpy(backref->name, name, namelen);
3294 backref->name[namelen] = '\0';
3295 list_add_tail(&backref->list, &rec->backrefs);
3299 static void free_root_record(struct cache_extent *cache)
3301 struct root_record *rec;
3302 struct root_backref *backref;
3304 rec = container_of(cache, struct root_record, cache);
3305 while (!list_empty(&rec->backrefs)) {
3306 backref = to_root_backref(rec->backrefs.next);
3307 list_del(&backref->list);
3314 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3316 static int add_root_backref(struct cache_tree *root_cache,
3317 u64 root_id, u64 ref_root, u64 dir, u64 index,
3318 const char *name, int namelen,
3319 int item_type, int errors)
3321 struct root_record *rec;
3322 struct root_backref *backref;
3324 rec = get_root_rec(root_cache, root_id);
3325 BUG_ON(IS_ERR(rec));
3326 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3329 backref->errors |= errors;
3331 if (item_type != BTRFS_DIR_ITEM_KEY) {
3332 if (backref->found_dir_index || backref->found_back_ref ||
3333 backref->found_forward_ref) {
3334 if (backref->index != index)
3335 backref->errors |= REF_ERR_INDEX_UNMATCH;
3337 backref->index = index;
3341 if (item_type == BTRFS_DIR_ITEM_KEY) {
3342 if (backref->found_forward_ref)
3344 backref->found_dir_item = 1;
3345 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3346 backref->found_dir_index = 1;
3347 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3348 if (backref->found_forward_ref)
3349 backref->errors |= REF_ERR_DUP_ROOT_REF;
3350 else if (backref->found_dir_item)
3352 backref->found_forward_ref = 1;
3353 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3354 if (backref->found_back_ref)
3355 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3356 backref->found_back_ref = 1;
3361 if (backref->found_forward_ref && backref->found_dir_item)
3362 backref->reachable = 1;
3366 static int merge_root_recs(struct btrfs_root *root,
3367 struct cache_tree *src_cache,
3368 struct cache_tree *dst_cache)
3370 struct cache_extent *cache;
3371 struct ptr_node *node;
3372 struct inode_record *rec;
3373 struct inode_backref *backref;
3376 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3377 free_inode_recs_tree(src_cache);
3382 cache = search_cache_extent(src_cache, 0);
3385 node = container_of(cache, struct ptr_node, cache);
3387 remove_cache_extent(src_cache, &node->cache);
3390 ret = is_child_root(root, root->objectid, rec->ino);
3396 list_for_each_entry(backref, &rec->backrefs, list) {
3397 BUG_ON(backref->found_inode_ref);
3398 if (backref->found_dir_item)
3399 add_root_backref(dst_cache, rec->ino,
3400 root->root_key.objectid, backref->dir,
3401 backref->index, backref->name,
3402 backref->namelen, BTRFS_DIR_ITEM_KEY,
3404 if (backref->found_dir_index)
3405 add_root_backref(dst_cache, rec->ino,
3406 root->root_key.objectid, backref->dir,
3407 backref->index, backref->name,
3408 backref->namelen, BTRFS_DIR_INDEX_KEY,
3412 free_inode_rec(rec);
3419 static int check_root_refs(struct btrfs_root *root,
3420 struct cache_tree *root_cache)
3422 struct root_record *rec;
3423 struct root_record *ref_root;
3424 struct root_backref *backref;
3425 struct cache_extent *cache;
3431 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3432 BUG_ON(IS_ERR(rec));
3435 /* fixme: this can not detect circular references */
3438 cache = search_cache_extent(root_cache, 0);
3442 rec = container_of(cache, struct root_record, cache);
3443 cache = next_cache_extent(cache);
3445 if (rec->found_ref == 0)
3448 list_for_each_entry(backref, &rec->backrefs, list) {
3449 if (!backref->reachable)
3452 ref_root = get_root_rec(root_cache,
3454 BUG_ON(IS_ERR(ref_root));
3455 if (ref_root->found_ref > 0)
3458 backref->reachable = 0;
3460 if (rec->found_ref == 0)
3466 cache = search_cache_extent(root_cache, 0);
3470 rec = container_of(cache, struct root_record, cache);
3471 cache = next_cache_extent(cache);
3473 if (rec->found_ref == 0 &&
3474 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3475 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3476 ret = check_orphan_item(root->fs_info->tree_root,
3482 * If we don't have a root item then we likely just have
3483 * a dir item in a snapshot for this root but no actual
3484 * ref key or anything so it's meaningless.
3486 if (!rec->found_root_item)
3489 fprintf(stderr, "fs tree %llu not referenced\n",
3490 (unsigned long long)rec->objectid);
3494 if (rec->found_ref > 0 && !rec->found_root_item)
3496 list_for_each_entry(backref, &rec->backrefs, list) {
3497 if (!backref->found_dir_item)
3498 backref->errors |= REF_ERR_NO_DIR_ITEM;
3499 if (!backref->found_dir_index)
3500 backref->errors |= REF_ERR_NO_DIR_INDEX;
3501 if (!backref->found_back_ref)
3502 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3503 if (!backref->found_forward_ref)
3504 backref->errors |= REF_ERR_NO_ROOT_REF;
3505 if (backref->reachable && backref->errors)
3512 fprintf(stderr, "fs tree %llu refs %u %s\n",
3513 (unsigned long long)rec->objectid, rec->found_ref,
3514 rec->found_root_item ? "" : "not found");
3516 list_for_each_entry(backref, &rec->backrefs, list) {
3517 if (!backref->reachable)
3519 if (!backref->errors && rec->found_root_item)
3521 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3522 " index %llu namelen %u name %s errors %x\n",
3523 (unsigned long long)backref->ref_root,
3524 (unsigned long long)backref->dir,
3525 (unsigned long long)backref->index,
3526 backref->namelen, backref->name,
3528 print_ref_error(backref->errors);
3531 return errors > 0 ? 1 : 0;
3534 static int process_root_ref(struct extent_buffer *eb, int slot,
3535 struct btrfs_key *key,
3536 struct cache_tree *root_cache)
3542 struct btrfs_root_ref *ref;
3543 char namebuf[BTRFS_NAME_LEN];
3546 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3548 dirid = btrfs_root_ref_dirid(eb, ref);
3549 index = btrfs_root_ref_sequence(eb, ref);
3550 name_len = btrfs_root_ref_name_len(eb, ref);
3552 if (name_len <= BTRFS_NAME_LEN) {
3556 len = BTRFS_NAME_LEN;
3557 error = REF_ERR_NAME_TOO_LONG;
3559 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3561 if (key->type == BTRFS_ROOT_REF_KEY) {
3562 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3563 index, namebuf, len, key->type, error);
3565 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3566 index, namebuf, len, key->type, error);
3571 static void free_corrupt_block(struct cache_extent *cache)
3573 struct btrfs_corrupt_block *corrupt;
3575 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3579 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3582 * Repair the btree of the given root.
3584 * The fix is to remove the node key in corrupt_blocks cache_tree.
3585 * and rebalance the tree.
3586 * After the fix, the btree should be writeable.
3588 static int repair_btree(struct btrfs_root *root,
3589 struct cache_tree *corrupt_blocks)
3591 struct btrfs_trans_handle *trans;
3592 struct btrfs_path *path;
3593 struct btrfs_corrupt_block *corrupt;
3594 struct cache_extent *cache;
3595 struct btrfs_key key;
3600 if (cache_tree_empty(corrupt_blocks))
3603 path = btrfs_alloc_path();
3607 trans = btrfs_start_transaction(root, 1);
3608 if (IS_ERR(trans)) {
3609 ret = PTR_ERR(trans);
3610 fprintf(stderr, "Error starting transaction: %s\n",
3614 cache = first_cache_extent(corrupt_blocks);
3616 corrupt = container_of(cache, struct btrfs_corrupt_block,
3618 level = corrupt->level;
3619 path->lowest_level = level;
3620 key.objectid = corrupt->key.objectid;
3621 key.type = corrupt->key.type;
3622 key.offset = corrupt->key.offset;
3625 * Here we don't want to do any tree balance, since it may
3626 * cause a balance with corrupted brother leaf/node,
3627 * so ins_len set to 0 here.
3628 * Balance will be done after all corrupt node/leaf is deleted.
3630 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3633 offset = btrfs_node_blockptr(path->nodes[level],
3634 path->slots[level]);
3636 /* Remove the ptr */
3637 ret = btrfs_del_ptr(trans, root, path, level,
3638 path->slots[level]);
3642 * Remove the corresponding extent
3643 * return value is not concerned.
3645 btrfs_release_path(path);
3646 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3647 0, root->root_key.objectid,
3649 cache = next_cache_extent(cache);
3652 /* Balance the btree using btrfs_search_slot() */
3653 cache = first_cache_extent(corrupt_blocks);
3655 corrupt = container_of(cache, struct btrfs_corrupt_block,
3657 memcpy(&key, &corrupt->key, sizeof(key));
3658 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3661 /* return will always >0 since it won't find the item */
3663 btrfs_release_path(path);
3664 cache = next_cache_extent(cache);
3667 btrfs_commit_transaction(trans, root);
3669 btrfs_free_path(path);
3673 static int check_fs_root(struct btrfs_root *root,
3674 struct cache_tree *root_cache,
3675 struct walk_control *wc)
3681 struct btrfs_path path;
3682 struct shared_node root_node;
3683 struct root_record *rec;
3684 struct btrfs_root_item *root_item = &root->root_item;
3685 struct cache_tree corrupt_blocks;
3686 struct orphan_data_extent *orphan;
3687 struct orphan_data_extent *tmp;
3688 enum btrfs_tree_block_status status;
3689 struct node_refs nrefs;
3692 * Reuse the corrupt_block cache tree to record corrupted tree block
3694 * Unlike the usage in extent tree check, here we do it in a per
3695 * fs/subvol tree base.
3697 cache_tree_init(&corrupt_blocks);
3698 root->fs_info->corrupt_blocks = &corrupt_blocks;
3700 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3701 rec = get_root_rec(root_cache, root->root_key.objectid);
3702 BUG_ON(IS_ERR(rec));
3703 if (btrfs_root_refs(root_item) > 0)
3704 rec->found_root_item = 1;
3707 btrfs_init_path(&path);
3708 memset(&root_node, 0, sizeof(root_node));
3709 cache_tree_init(&root_node.root_cache);
3710 cache_tree_init(&root_node.inode_cache);
3711 memset(&nrefs, 0, sizeof(nrefs));
3713 /* Move the orphan extent record to corresponding inode_record */
3714 list_for_each_entry_safe(orphan, tmp,
3715 &root->orphan_data_extents, list) {
3716 struct inode_record *inode;
3718 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3720 BUG_ON(IS_ERR(inode));
3721 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3722 list_move(&orphan->list, &inode->orphan_extents);
3725 level = btrfs_header_level(root->node);
3726 memset(wc->nodes, 0, sizeof(wc->nodes));
3727 wc->nodes[level] = &root_node;
3728 wc->active_node = level;
3729 wc->root_level = level;
3731 /* We may not have checked the root block, lets do that now */
3732 if (btrfs_is_leaf(root->node))
3733 status = btrfs_check_leaf(root, NULL, root->node);
3735 status = btrfs_check_node(root, NULL, root->node);
3736 if (status != BTRFS_TREE_BLOCK_CLEAN)
3739 if (btrfs_root_refs(root_item) > 0 ||
3740 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3741 path.nodes[level] = root->node;
3742 extent_buffer_get(root->node);
3743 path.slots[level] = 0;
3745 struct btrfs_key key;
3746 struct btrfs_disk_key found_key;
3748 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3749 level = root_item->drop_level;
3750 path.lowest_level = level;
3751 if (level > btrfs_header_level(root->node) ||
3752 level >= BTRFS_MAX_LEVEL) {
3753 error("ignoring invalid drop level: %u", level);
3756 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3759 btrfs_node_key(path.nodes[level], &found_key,
3761 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3762 sizeof(found_key)));
3766 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3772 wret = walk_up_tree(root, &path, wc, &level);
3779 btrfs_release_path(&path);
3781 if (!cache_tree_empty(&corrupt_blocks)) {
3782 struct cache_extent *cache;
3783 struct btrfs_corrupt_block *corrupt;
3785 printf("The following tree block(s) is corrupted in tree %llu:\n",
3786 root->root_key.objectid);
3787 cache = first_cache_extent(&corrupt_blocks);
3789 corrupt = container_of(cache,
3790 struct btrfs_corrupt_block,
3792 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3793 cache->start, corrupt->level,
3794 corrupt->key.objectid, corrupt->key.type,
3795 corrupt->key.offset);
3796 cache = next_cache_extent(cache);
3799 printf("Try to repair the btree for root %llu\n",
3800 root->root_key.objectid);
3801 ret = repair_btree(root, &corrupt_blocks);
3803 fprintf(stderr, "Failed to repair btree: %s\n",
3806 printf("Btree for root %llu is fixed\n",
3807 root->root_key.objectid);
3811 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3815 if (root_node.current) {
3816 root_node.current->checked = 1;
3817 maybe_free_inode_rec(&root_node.inode_cache,
3821 err = check_inode_recs(root, &root_node.inode_cache);
3825 free_corrupt_blocks_tree(&corrupt_blocks);
3826 root->fs_info->corrupt_blocks = NULL;
3827 free_orphan_data_extents(&root->orphan_data_extents);
3831 static int fs_root_objectid(u64 objectid)
3833 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3834 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3836 return is_fstree(objectid);
3839 static int check_fs_roots(struct btrfs_root *root,
3840 struct cache_tree *root_cache)
3842 struct btrfs_path path;
3843 struct btrfs_key key;
3844 struct walk_control wc;
3845 struct extent_buffer *leaf, *tree_node;
3846 struct btrfs_root *tmp_root;
3847 struct btrfs_root *tree_root = root->fs_info->tree_root;
3851 if (ctx.progress_enabled) {
3852 ctx.tp = TASK_FS_ROOTS;
3853 task_start(ctx.info);
3857 * Just in case we made any changes to the extent tree that weren't
3858 * reflected into the free space cache yet.
3861 reset_cached_block_groups(root->fs_info);
3862 memset(&wc, 0, sizeof(wc));
3863 cache_tree_init(&wc.shared);
3864 btrfs_init_path(&path);
3869 key.type = BTRFS_ROOT_ITEM_KEY;
3870 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3875 tree_node = tree_root->node;
3877 if (tree_node != tree_root->node) {
3878 free_root_recs_tree(root_cache);
3879 btrfs_release_path(&path);
3882 leaf = path.nodes[0];
3883 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3884 ret = btrfs_next_leaf(tree_root, &path);
3890 leaf = path.nodes[0];
3892 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3893 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3894 fs_root_objectid(key.objectid)) {
3895 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3896 tmp_root = btrfs_read_fs_root_no_cache(
3897 root->fs_info, &key);
3899 key.offset = (u64)-1;
3900 tmp_root = btrfs_read_fs_root(
3901 root->fs_info, &key);
3903 if (IS_ERR(tmp_root)) {
3907 ret = check_fs_root(tmp_root, root_cache, &wc);
3908 if (ret == -EAGAIN) {
3909 free_root_recs_tree(root_cache);
3910 btrfs_release_path(&path);
3915 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3916 btrfs_free_fs_root(tmp_root);
3917 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3918 key.type == BTRFS_ROOT_BACKREF_KEY) {
3919 process_root_ref(leaf, path.slots[0], &key,
3926 btrfs_release_path(&path);
3928 free_extent_cache_tree(&wc.shared);
3929 if (!cache_tree_empty(&wc.shared))
3930 fprintf(stderr, "warning line %d\n", __LINE__);
3932 task_stop(ctx.info);
3937 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3939 struct list_head *cur = rec->backrefs.next;
3940 struct extent_backref *back;
3941 struct tree_backref *tback;
3942 struct data_backref *dback;
3946 while(cur != &rec->backrefs) {
3947 back = to_extent_backref(cur);
3949 if (!back->found_extent_tree) {
3953 if (back->is_data) {
3954 dback = to_data_backref(back);
3955 fprintf(stderr, "Backref %llu %s %llu"
3956 " owner %llu offset %llu num_refs %lu"
3957 " not found in extent tree\n",
3958 (unsigned long long)rec->start,
3959 back->full_backref ?
3961 back->full_backref ?
3962 (unsigned long long)dback->parent:
3963 (unsigned long long)dback->root,
3964 (unsigned long long)dback->owner,
3965 (unsigned long long)dback->offset,
3966 (unsigned long)dback->num_refs);
3968 tback = to_tree_backref(back);
3969 fprintf(stderr, "Backref %llu parent %llu"
3970 " root %llu not found in extent tree\n",
3971 (unsigned long long)rec->start,
3972 (unsigned long long)tback->parent,
3973 (unsigned long long)tback->root);
3976 if (!back->is_data && !back->found_ref) {
3980 tback = to_tree_backref(back);
3981 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3982 (unsigned long long)rec->start,
3983 back->full_backref ? "parent" : "root",
3984 back->full_backref ?
3985 (unsigned long long)tback->parent :
3986 (unsigned long long)tback->root, back);
3988 if (back->is_data) {
3989 dback = to_data_backref(back);
3990 if (dback->found_ref != dback->num_refs) {
3994 fprintf(stderr, "Incorrect local backref count"
3995 " on %llu %s %llu owner %llu"
3996 " offset %llu found %u wanted %u back %p\n",
3997 (unsigned long long)rec->start,
3998 back->full_backref ?
4000 back->full_backref ?
4001 (unsigned long long)dback->parent:
4002 (unsigned long long)dback->root,
4003 (unsigned long long)dback->owner,
4004 (unsigned long long)dback->offset,
4005 dback->found_ref, dback->num_refs, back);
4007 if (dback->disk_bytenr != rec->start) {
4011 fprintf(stderr, "Backref disk bytenr does not"
4012 " match extent record, bytenr=%llu, "
4013 "ref bytenr=%llu\n",
4014 (unsigned long long)rec->start,
4015 (unsigned long long)dback->disk_bytenr);
4018 if (dback->bytes != rec->nr) {
4022 fprintf(stderr, "Backref bytes do not match "
4023 "extent backref, bytenr=%llu, ref "
4024 "bytes=%llu, backref bytes=%llu\n",
4025 (unsigned long long)rec->start,
4026 (unsigned long long)rec->nr,
4027 (unsigned long long)dback->bytes);
4030 if (!back->is_data) {
4033 dback = to_data_backref(back);
4034 found += dback->found_ref;
4037 if (found != rec->refs) {
4041 fprintf(stderr, "Incorrect global backref count "
4042 "on %llu found %llu wanted %llu\n",
4043 (unsigned long long)rec->start,
4044 (unsigned long long)found,
4045 (unsigned long long)rec->refs);
4051 static int free_all_extent_backrefs(struct extent_record *rec)
4053 struct extent_backref *back;
4054 struct list_head *cur;
4055 while (!list_empty(&rec->backrefs)) {
4056 cur = rec->backrefs.next;
4057 back = to_extent_backref(cur);
4064 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4065 struct cache_tree *extent_cache)
4067 struct cache_extent *cache;
4068 struct extent_record *rec;
4071 cache = first_cache_extent(extent_cache);
4074 rec = container_of(cache, struct extent_record, cache);
4075 remove_cache_extent(extent_cache, cache);
4076 free_all_extent_backrefs(rec);
4081 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4082 struct extent_record *rec)
4084 if (rec->content_checked && rec->owner_ref_checked &&
4085 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4086 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4087 !rec->bad_full_backref && !rec->crossing_stripes &&
4088 !rec->wrong_chunk_type) {
4089 remove_cache_extent(extent_cache, &rec->cache);
4090 free_all_extent_backrefs(rec);
4091 list_del_init(&rec->list);
4097 static int check_owner_ref(struct btrfs_root *root,
4098 struct extent_record *rec,
4099 struct extent_buffer *buf)
4101 struct extent_backref *node;
4102 struct tree_backref *back;
4103 struct btrfs_root *ref_root;
4104 struct btrfs_key key;
4105 struct btrfs_path path;
4106 struct extent_buffer *parent;
4111 list_for_each_entry(node, &rec->backrefs, list) {
4114 if (!node->found_ref)
4116 if (node->full_backref)
4118 back = to_tree_backref(node);
4119 if (btrfs_header_owner(buf) == back->root)
4122 BUG_ON(rec->is_root);
4124 /* try to find the block by search corresponding fs tree */
4125 key.objectid = btrfs_header_owner(buf);
4126 key.type = BTRFS_ROOT_ITEM_KEY;
4127 key.offset = (u64)-1;
4129 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4130 if (IS_ERR(ref_root))
4133 level = btrfs_header_level(buf);
4135 btrfs_item_key_to_cpu(buf, &key, 0);
4137 btrfs_node_key_to_cpu(buf, &key, 0);
4139 btrfs_init_path(&path);
4140 path.lowest_level = level + 1;
4141 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4145 parent = path.nodes[level + 1];
4146 if (parent && buf->start == btrfs_node_blockptr(parent,
4147 path.slots[level + 1]))
4150 btrfs_release_path(&path);
4151 return found ? 0 : 1;
4154 static int is_extent_tree_record(struct extent_record *rec)
4156 struct list_head *cur = rec->backrefs.next;
4157 struct extent_backref *node;
4158 struct tree_backref *back;
4161 while(cur != &rec->backrefs) {
4162 node = to_extent_backref(cur);
4166 back = to_tree_backref(node);
4167 if (node->full_backref)
4169 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4176 static int record_bad_block_io(struct btrfs_fs_info *info,
4177 struct cache_tree *extent_cache,
4180 struct extent_record *rec;
4181 struct cache_extent *cache;
4182 struct btrfs_key key;
4184 cache = lookup_cache_extent(extent_cache, start, len);
4188 rec = container_of(cache, struct extent_record, cache);
4189 if (!is_extent_tree_record(rec))
4192 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4193 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4196 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4197 struct extent_buffer *buf, int slot)
4199 if (btrfs_header_level(buf)) {
4200 struct btrfs_key_ptr ptr1, ptr2;
4202 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4203 sizeof(struct btrfs_key_ptr));
4204 read_extent_buffer(buf, &ptr2,
4205 btrfs_node_key_ptr_offset(slot + 1),
4206 sizeof(struct btrfs_key_ptr));
4207 write_extent_buffer(buf, &ptr1,
4208 btrfs_node_key_ptr_offset(slot + 1),
4209 sizeof(struct btrfs_key_ptr));
4210 write_extent_buffer(buf, &ptr2,
4211 btrfs_node_key_ptr_offset(slot),
4212 sizeof(struct btrfs_key_ptr));
4214 struct btrfs_disk_key key;
4215 btrfs_node_key(buf, &key, 0);
4216 btrfs_fixup_low_keys(root, path, &key,
4217 btrfs_header_level(buf) + 1);
4220 struct btrfs_item *item1, *item2;
4221 struct btrfs_key k1, k2;
4222 char *item1_data, *item2_data;
4223 u32 item1_offset, item2_offset, item1_size, item2_size;
4225 item1 = btrfs_item_nr(slot);
4226 item2 = btrfs_item_nr(slot + 1);
4227 btrfs_item_key_to_cpu(buf, &k1, slot);
4228 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4229 item1_offset = btrfs_item_offset(buf, item1);
4230 item2_offset = btrfs_item_offset(buf, item2);
4231 item1_size = btrfs_item_size(buf, item1);
4232 item2_size = btrfs_item_size(buf, item2);
4234 item1_data = malloc(item1_size);
4237 item2_data = malloc(item2_size);
4243 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4244 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4246 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4247 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4251 btrfs_set_item_offset(buf, item1, item2_offset);
4252 btrfs_set_item_offset(buf, item2, item1_offset);
4253 btrfs_set_item_size(buf, item1, item2_size);
4254 btrfs_set_item_size(buf, item2, item1_size);
4256 path->slots[0] = slot;
4257 btrfs_set_item_key_unsafe(root, path, &k2);
4258 path->slots[0] = slot + 1;
4259 btrfs_set_item_key_unsafe(root, path, &k1);
4264 static int fix_key_order(struct btrfs_trans_handle *trans,
4265 struct btrfs_root *root,
4266 struct btrfs_path *path)
4268 struct extent_buffer *buf;
4269 struct btrfs_key k1, k2;
4271 int level = path->lowest_level;
4274 buf = path->nodes[level];
4275 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4277 btrfs_node_key_to_cpu(buf, &k1, i);
4278 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4280 btrfs_item_key_to_cpu(buf, &k1, i);
4281 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4283 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4285 ret = swap_values(root, path, buf, i);
4288 btrfs_mark_buffer_dirty(buf);
4294 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4295 struct btrfs_root *root,
4296 struct btrfs_path *path,
4297 struct extent_buffer *buf, int slot)
4299 struct btrfs_key key;
4300 int nritems = btrfs_header_nritems(buf);
4302 btrfs_item_key_to_cpu(buf, &key, slot);
4304 /* These are all the keys we can deal with missing. */
4305 if (key.type != BTRFS_DIR_INDEX_KEY &&
4306 key.type != BTRFS_EXTENT_ITEM_KEY &&
4307 key.type != BTRFS_METADATA_ITEM_KEY &&
4308 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4309 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4312 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4313 (unsigned long long)key.objectid, key.type,
4314 (unsigned long long)key.offset, slot, buf->start);
4315 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4316 btrfs_item_nr_offset(slot + 1),
4317 sizeof(struct btrfs_item) *
4318 (nritems - slot - 1));
4319 btrfs_set_header_nritems(buf, nritems - 1);
4321 struct btrfs_disk_key disk_key;
4323 btrfs_item_key(buf, &disk_key, 0);
4324 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4326 btrfs_mark_buffer_dirty(buf);
4330 static int fix_item_offset(struct btrfs_trans_handle *trans,
4331 struct btrfs_root *root,
4332 struct btrfs_path *path)
4334 struct extent_buffer *buf;
4338 /* We should only get this for leaves */
4339 BUG_ON(path->lowest_level);
4340 buf = path->nodes[0];
4342 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4343 unsigned int shift = 0, offset;
4345 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4346 BTRFS_LEAF_DATA_SIZE(root)) {
4347 if (btrfs_item_end_nr(buf, i) >
4348 BTRFS_LEAF_DATA_SIZE(root)) {
4349 ret = delete_bogus_item(trans, root, path,
4353 fprintf(stderr, "item is off the end of the "
4354 "leaf, can't fix\n");
4358 shift = BTRFS_LEAF_DATA_SIZE(root) -
4359 btrfs_item_end_nr(buf, i);
4360 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4361 btrfs_item_offset_nr(buf, i - 1)) {
4362 if (btrfs_item_end_nr(buf, i) >
4363 btrfs_item_offset_nr(buf, i - 1)) {
4364 ret = delete_bogus_item(trans, root, path,
4368 fprintf(stderr, "items overlap, can't fix\n");
4372 shift = btrfs_item_offset_nr(buf, i - 1) -
4373 btrfs_item_end_nr(buf, i);
4378 printf("Shifting item nr %d by %u bytes in block %llu\n",
4379 i, shift, (unsigned long long)buf->start);
4380 offset = btrfs_item_offset_nr(buf, i);
4381 memmove_extent_buffer(buf,
4382 btrfs_leaf_data(buf) + offset + shift,
4383 btrfs_leaf_data(buf) + offset,
4384 btrfs_item_size_nr(buf, i));
4385 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4387 btrfs_mark_buffer_dirty(buf);
4391 * We may have moved things, in which case we want to exit so we don't
4392 * write those changes out. Once we have proper abort functionality in
4393 * progs this can be changed to something nicer.
4400 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4401 * then just return -EIO.
4403 static int try_to_fix_bad_block(struct btrfs_root *root,
4404 struct extent_buffer *buf,
4405 enum btrfs_tree_block_status status)
4407 struct btrfs_trans_handle *trans;
4408 struct ulist *roots;
4409 struct ulist_node *node;
4410 struct btrfs_root *search_root;
4411 struct btrfs_path *path;
4412 struct ulist_iterator iter;
4413 struct btrfs_key root_key, key;
4416 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4417 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4420 path = btrfs_alloc_path();
4424 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4427 btrfs_free_path(path);
4431 ULIST_ITER_INIT(&iter);
4432 while ((node = ulist_next(roots, &iter))) {
4433 root_key.objectid = node->val;
4434 root_key.type = BTRFS_ROOT_ITEM_KEY;
4435 root_key.offset = (u64)-1;
4437 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4444 trans = btrfs_start_transaction(search_root, 0);
4445 if (IS_ERR(trans)) {
4446 ret = PTR_ERR(trans);
4450 path->lowest_level = btrfs_header_level(buf);
4451 path->skip_check_block = 1;
4452 if (path->lowest_level)
4453 btrfs_node_key_to_cpu(buf, &key, 0);
4455 btrfs_item_key_to_cpu(buf, &key, 0);
4456 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4459 btrfs_commit_transaction(trans, search_root);
4462 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4463 ret = fix_key_order(trans, search_root, path);
4464 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4465 ret = fix_item_offset(trans, search_root, path);
4467 btrfs_commit_transaction(trans, search_root);
4470 btrfs_release_path(path);
4471 btrfs_commit_transaction(trans, search_root);
4474 btrfs_free_path(path);
4478 static int check_block(struct btrfs_root *root,
4479 struct cache_tree *extent_cache,
4480 struct extent_buffer *buf, u64 flags)
4482 struct extent_record *rec;
4483 struct cache_extent *cache;
4484 struct btrfs_key key;
4485 enum btrfs_tree_block_status status;
4489 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4492 rec = container_of(cache, struct extent_record, cache);
4493 rec->generation = btrfs_header_generation(buf);
4495 level = btrfs_header_level(buf);
4496 if (btrfs_header_nritems(buf) > 0) {
4499 btrfs_item_key_to_cpu(buf, &key, 0);
4501 btrfs_node_key_to_cpu(buf, &key, 0);
4503 rec->info_objectid = key.objectid;
4505 rec->info_level = level;
4507 if (btrfs_is_leaf(buf))
4508 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4510 status = btrfs_check_node(root, &rec->parent_key, buf);
4512 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4514 status = try_to_fix_bad_block(root, buf, status);
4515 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4517 fprintf(stderr, "bad block %llu\n",
4518 (unsigned long long)buf->start);
4521 * Signal to callers we need to start the scan over
4522 * again since we'll have cowed blocks.
4527 rec->content_checked = 1;
4528 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4529 rec->owner_ref_checked = 1;
4531 ret = check_owner_ref(root, rec, buf);
4533 rec->owner_ref_checked = 1;
4537 maybe_free_extent_rec(extent_cache, rec);
4542 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4543 u64 parent, u64 root)
4545 struct rb_node *node;
4546 struct tree_backref *back = NULL;
4547 struct tree_backref match = {
4554 match.parent = parent;
4555 match.node.full_backref = 1;
4560 node = rb_search(&rec->backref_tree, &match.node.node,
4561 (rb_compare_keys)compare_extent_backref, NULL);
4563 back = to_tree_backref(rb_node_to_extent_backref(node));
4568 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4569 u64 parent, u64 root)
4571 struct tree_backref *ref = malloc(sizeof(*ref));
4575 memset(&ref->node, 0, sizeof(ref->node));
4577 ref->parent = parent;
4578 ref->node.full_backref = 1;
4581 ref->node.full_backref = 0;
4583 list_add_tail(&ref->node.list, &rec->backrefs);
4584 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4589 static struct data_backref *find_data_backref(struct extent_record *rec,
4590 u64 parent, u64 root,
4591 u64 owner, u64 offset,
4593 u64 disk_bytenr, u64 bytes)
4595 struct rb_node *node;
4596 struct data_backref *back = NULL;
4597 struct data_backref match = {
4604 .found_ref = found_ref,
4605 .disk_bytenr = disk_bytenr,
4609 match.parent = parent;
4610 match.node.full_backref = 1;
4615 node = rb_search(&rec->backref_tree, &match.node.node,
4616 (rb_compare_keys)compare_extent_backref, NULL);
4618 back = to_data_backref(rb_node_to_extent_backref(node));
4623 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4624 u64 parent, u64 root,
4625 u64 owner, u64 offset,
4628 struct data_backref *ref = malloc(sizeof(*ref));
4632 memset(&ref->node, 0, sizeof(ref->node));
4633 ref->node.is_data = 1;
4636 ref->parent = parent;
4639 ref->node.full_backref = 1;
4643 ref->offset = offset;
4644 ref->node.full_backref = 0;
4646 ref->bytes = max_size;
4649 list_add_tail(&ref->node.list, &rec->backrefs);
4650 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4651 if (max_size > rec->max_size)
4652 rec->max_size = max_size;
4656 /* Check if the type of extent matches with its chunk */
4657 static void check_extent_type(struct extent_record *rec)
4659 struct btrfs_block_group_cache *bg_cache;
4661 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4665 /* data extent, check chunk directly*/
4666 if (!rec->metadata) {
4667 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4668 rec->wrong_chunk_type = 1;
4672 /* metadata extent, check the obvious case first */
4673 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4674 BTRFS_BLOCK_GROUP_METADATA))) {
4675 rec->wrong_chunk_type = 1;
4680 * Check SYSTEM extent, as it's also marked as metadata, we can only
4681 * make sure it's a SYSTEM extent by its backref
4683 if (!list_empty(&rec->backrefs)) {
4684 struct extent_backref *node;
4685 struct tree_backref *tback;
4688 node = to_extent_backref(rec->backrefs.next);
4689 if (node->is_data) {
4690 /* tree block shouldn't have data backref */
4691 rec->wrong_chunk_type = 1;
4694 tback = container_of(node, struct tree_backref, node);
4696 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4697 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4699 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4700 if (!(bg_cache->flags & bg_type))
4701 rec->wrong_chunk_type = 1;
4706 * Allocate a new extent record, fill default values from @tmpl and insert int
4707 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4708 * the cache, otherwise it fails.
4710 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4711 struct extent_record *tmpl)
4713 struct extent_record *rec;
4716 rec = malloc(sizeof(*rec));
4719 rec->start = tmpl->start;
4720 rec->max_size = tmpl->max_size;
4721 rec->nr = max(tmpl->nr, tmpl->max_size);
4722 rec->found_rec = tmpl->found_rec;
4723 rec->content_checked = tmpl->content_checked;
4724 rec->owner_ref_checked = tmpl->owner_ref_checked;
4725 rec->num_duplicates = 0;
4726 rec->metadata = tmpl->metadata;
4727 rec->flag_block_full_backref = FLAG_UNSET;
4728 rec->bad_full_backref = 0;
4729 rec->crossing_stripes = 0;
4730 rec->wrong_chunk_type = 0;
4731 rec->is_root = tmpl->is_root;
4732 rec->refs = tmpl->refs;
4733 rec->extent_item_refs = tmpl->extent_item_refs;
4734 rec->parent_generation = tmpl->parent_generation;
4735 INIT_LIST_HEAD(&rec->backrefs);
4736 INIT_LIST_HEAD(&rec->dups);
4737 INIT_LIST_HEAD(&rec->list);
4738 rec->backref_tree = RB_ROOT;
4739 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4740 rec->cache.start = tmpl->start;
4741 rec->cache.size = tmpl->nr;
4742 ret = insert_cache_extent(extent_cache, &rec->cache);
4744 bytes_used += rec->nr;
4747 rec->crossing_stripes = check_crossing_stripes(rec->start,
4748 global_info->tree_root->nodesize);
4749 check_extent_type(rec);
4754 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4756 * - refs - if found, increase refs
4757 * - is_root - if found, set
4758 * - content_checked - if found, set
4759 * - owner_ref_checked - if found, set
4761 * If not found, create a new one, initialize and insert.
4763 static int add_extent_rec(struct cache_tree *extent_cache,
4764 struct extent_record *tmpl)
4766 struct extent_record *rec;
4767 struct cache_extent *cache;
4771 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4773 rec = container_of(cache, struct extent_record, cache);
4777 rec->nr = max(tmpl->nr, tmpl->max_size);
4780 * We need to make sure to reset nr to whatever the extent
4781 * record says was the real size, this way we can compare it to
4784 if (tmpl->found_rec) {
4785 if (tmpl->start != rec->start || rec->found_rec) {
4786 struct extent_record *tmp;
4789 if (list_empty(&rec->list))
4790 list_add_tail(&rec->list,
4791 &duplicate_extents);
4794 * We have to do this song and dance in case we
4795 * find an extent record that falls inside of
4796 * our current extent record but does not have
4797 * the same objectid.
4799 tmp = malloc(sizeof(*tmp));
4802 tmp->start = tmpl->start;
4803 tmp->max_size = tmpl->max_size;
4806 tmp->metadata = tmpl->metadata;
4807 tmp->extent_item_refs = tmpl->extent_item_refs;
4808 INIT_LIST_HEAD(&tmp->list);
4809 list_add_tail(&tmp->list, &rec->dups);
4810 rec->num_duplicates++;
4817 if (tmpl->extent_item_refs && !dup) {
4818 if (rec->extent_item_refs) {
4819 fprintf(stderr, "block %llu rec "
4820 "extent_item_refs %llu, passed %llu\n",
4821 (unsigned long long)tmpl->start,
4822 (unsigned long long)
4823 rec->extent_item_refs,
4824 (unsigned long long)tmpl->extent_item_refs);
4826 rec->extent_item_refs = tmpl->extent_item_refs;
4830 if (tmpl->content_checked)
4831 rec->content_checked = 1;
4832 if (tmpl->owner_ref_checked)
4833 rec->owner_ref_checked = 1;
4834 memcpy(&rec->parent_key, &tmpl->parent_key,
4835 sizeof(tmpl->parent_key));
4836 if (tmpl->parent_generation)
4837 rec->parent_generation = tmpl->parent_generation;
4838 if (rec->max_size < tmpl->max_size)
4839 rec->max_size = tmpl->max_size;
4842 * A metadata extent can't cross stripe_len boundary, otherwise
4843 * kernel scrub won't be able to handle it.
4844 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4848 rec->crossing_stripes = check_crossing_stripes(
4849 rec->start, global_info->tree_root->nodesize);
4850 check_extent_type(rec);
4851 maybe_free_extent_rec(extent_cache, rec);
4855 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4860 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4861 u64 parent, u64 root, int found_ref)
4863 struct extent_record *rec;
4864 struct tree_backref *back;
4865 struct cache_extent *cache;
4867 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4869 struct extent_record tmpl;
4871 memset(&tmpl, 0, sizeof(tmpl));
4872 tmpl.start = bytenr;
4876 add_extent_rec_nolookup(extent_cache, &tmpl);
4878 /* really a bug in cache_extent implement now */
4879 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4884 rec = container_of(cache, struct extent_record, cache);
4885 if (rec->start != bytenr) {
4887 * Several cause, from unaligned bytenr to over lapping extents
4892 back = find_tree_backref(rec, parent, root);
4894 back = alloc_tree_backref(rec, parent, root);
4900 if (back->node.found_ref) {
4901 fprintf(stderr, "Extent back ref already exists "
4902 "for %llu parent %llu root %llu \n",
4903 (unsigned long long)bytenr,
4904 (unsigned long long)parent,
4905 (unsigned long long)root);
4907 back->node.found_ref = 1;
4909 if (back->node.found_extent_tree) {
4910 fprintf(stderr, "Extent back ref already exists "
4911 "for %llu parent %llu root %llu \n",
4912 (unsigned long long)bytenr,
4913 (unsigned long long)parent,
4914 (unsigned long long)root);
4916 back->node.found_extent_tree = 1;
4918 check_extent_type(rec);
4919 maybe_free_extent_rec(extent_cache, rec);
4923 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4924 u64 parent, u64 root, u64 owner, u64 offset,
4925 u32 num_refs, int found_ref, u64 max_size)
4927 struct extent_record *rec;
4928 struct data_backref *back;
4929 struct cache_extent *cache;
4931 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4933 struct extent_record tmpl;
4935 memset(&tmpl, 0, sizeof(tmpl));
4936 tmpl.start = bytenr;
4938 tmpl.max_size = max_size;
4940 add_extent_rec_nolookup(extent_cache, &tmpl);
4942 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4947 rec = container_of(cache, struct extent_record, cache);
4948 if (rec->max_size < max_size)
4949 rec->max_size = max_size;
4952 * If found_ref is set then max_size is the real size and must match the
4953 * existing refs. So if we have already found a ref then we need to
4954 * make sure that this ref matches the existing one, otherwise we need
4955 * to add a new backref so we can notice that the backrefs don't match
4956 * and we need to figure out who is telling the truth. This is to
4957 * account for that awful fsync bug I introduced where we'd end up with
4958 * a btrfs_file_extent_item that would have its length include multiple
4959 * prealloc extents or point inside of a prealloc extent.
4961 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4964 back = alloc_data_backref(rec, parent, root, owner, offset,
4970 BUG_ON(num_refs != 1);
4971 if (back->node.found_ref)
4972 BUG_ON(back->bytes != max_size);
4973 back->node.found_ref = 1;
4974 back->found_ref += 1;
4975 back->bytes = max_size;
4976 back->disk_bytenr = bytenr;
4978 rec->content_checked = 1;
4979 rec->owner_ref_checked = 1;
4981 if (back->node.found_extent_tree) {
4982 fprintf(stderr, "Extent back ref already exists "
4983 "for %llu parent %llu root %llu "
4984 "owner %llu offset %llu num_refs %lu\n",
4985 (unsigned long long)bytenr,
4986 (unsigned long long)parent,
4987 (unsigned long long)root,
4988 (unsigned long long)owner,
4989 (unsigned long long)offset,
4990 (unsigned long)num_refs);
4992 back->num_refs = num_refs;
4993 back->node.found_extent_tree = 1;
4995 maybe_free_extent_rec(extent_cache, rec);
4999 static int add_pending(struct cache_tree *pending,
5000 struct cache_tree *seen, u64 bytenr, u32 size)
5003 ret = add_cache_extent(seen, bytenr, size);
5006 add_cache_extent(pending, bytenr, size);
5010 static int pick_next_pending(struct cache_tree *pending,
5011 struct cache_tree *reada,
5012 struct cache_tree *nodes,
5013 u64 last, struct block_info *bits, int bits_nr,
5016 unsigned long node_start = last;
5017 struct cache_extent *cache;
5020 cache = search_cache_extent(reada, 0);
5022 bits[0].start = cache->start;
5023 bits[0].size = cache->size;
5028 if (node_start > 32768)
5029 node_start -= 32768;
5031 cache = search_cache_extent(nodes, node_start);
5033 cache = search_cache_extent(nodes, 0);
5036 cache = search_cache_extent(pending, 0);
5041 bits[ret].start = cache->start;
5042 bits[ret].size = cache->size;
5043 cache = next_cache_extent(cache);
5045 } while (cache && ret < bits_nr);
5051 bits[ret].start = cache->start;
5052 bits[ret].size = cache->size;
5053 cache = next_cache_extent(cache);
5055 } while (cache && ret < bits_nr);
5057 if (bits_nr - ret > 8) {
5058 u64 lookup = bits[0].start + bits[0].size;
5059 struct cache_extent *next;
5060 next = search_cache_extent(pending, lookup);
5062 if (next->start - lookup > 32768)
5064 bits[ret].start = next->start;
5065 bits[ret].size = next->size;
5066 lookup = next->start + next->size;
5070 next = next_cache_extent(next);
5078 static void free_chunk_record(struct cache_extent *cache)
5080 struct chunk_record *rec;
5082 rec = container_of(cache, struct chunk_record, cache);
5083 list_del_init(&rec->list);
5084 list_del_init(&rec->dextents);
5088 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5090 cache_tree_free_extents(chunk_cache, free_chunk_record);
5093 static void free_device_record(struct rb_node *node)
5095 struct device_record *rec;
5097 rec = container_of(node, struct device_record, node);
5101 FREE_RB_BASED_TREE(device_cache, free_device_record);
5103 int insert_block_group_record(struct block_group_tree *tree,
5104 struct block_group_record *bg_rec)
5108 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5112 list_add_tail(&bg_rec->list, &tree->block_groups);
5116 static void free_block_group_record(struct cache_extent *cache)
5118 struct block_group_record *rec;
5120 rec = container_of(cache, struct block_group_record, cache);
5121 list_del_init(&rec->list);
5125 void free_block_group_tree(struct block_group_tree *tree)
5127 cache_tree_free_extents(&tree->tree, free_block_group_record);
5130 int insert_device_extent_record(struct device_extent_tree *tree,
5131 struct device_extent_record *de_rec)
5136 * Device extent is a bit different from the other extents, because
5137 * the extents which belong to the different devices may have the
5138 * same start and size, so we need use the special extent cache
5139 * search/insert functions.
5141 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5145 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5146 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5150 static void free_device_extent_record(struct cache_extent *cache)
5152 struct device_extent_record *rec;
5154 rec = container_of(cache, struct device_extent_record, cache);
5155 if (!list_empty(&rec->chunk_list))
5156 list_del_init(&rec->chunk_list);
5157 if (!list_empty(&rec->device_list))
5158 list_del_init(&rec->device_list);
5162 void free_device_extent_tree(struct device_extent_tree *tree)
5164 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5167 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5168 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5169 struct extent_buffer *leaf, int slot)
5171 struct btrfs_extent_ref_v0 *ref0;
5172 struct btrfs_key key;
5175 btrfs_item_key_to_cpu(leaf, &key, slot);
5176 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5177 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5178 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
5181 ret = add_data_backref(extent_cache, key.objectid, key.offset,
5182 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5188 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5189 struct btrfs_key *key,
5192 struct btrfs_chunk *ptr;
5193 struct chunk_record *rec;
5196 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5197 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5199 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5201 fprintf(stderr, "memory allocation failed\n");
5205 INIT_LIST_HEAD(&rec->list);
5206 INIT_LIST_HEAD(&rec->dextents);
5209 rec->cache.start = key->offset;
5210 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5212 rec->generation = btrfs_header_generation(leaf);
5214 rec->objectid = key->objectid;
5215 rec->type = key->type;
5216 rec->offset = key->offset;
5218 rec->length = rec->cache.size;
5219 rec->owner = btrfs_chunk_owner(leaf, ptr);
5220 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5221 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5222 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5223 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5224 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5225 rec->num_stripes = num_stripes;
5226 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5228 for (i = 0; i < rec->num_stripes; ++i) {
5229 rec->stripes[i].devid =
5230 btrfs_stripe_devid_nr(leaf, ptr, i);
5231 rec->stripes[i].offset =
5232 btrfs_stripe_offset_nr(leaf, ptr, i);
5233 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5234 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5241 static int process_chunk_item(struct cache_tree *chunk_cache,
5242 struct btrfs_key *key, struct extent_buffer *eb,
5245 struct chunk_record *rec;
5246 struct btrfs_chunk *chunk;
5249 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
5251 * Do extra check for this chunk item,
5253 * It's still possible one can craft a leaf with CHUNK_ITEM, with
5254 * wrong onwer(3) out of chunk tree, to pass both chunk tree check
5255 * and owner<->key_type check.
5257 ret = btrfs_check_chunk_valid(global_info->tree_root, eb, chunk, slot,
5260 error("chunk(%llu, %llu) is not valid, ignore it",
5261 key->offset, btrfs_chunk_length(eb, chunk));
5264 rec = btrfs_new_chunk_record(eb, key, slot);
5265 ret = insert_cache_extent(chunk_cache, &rec->cache);
5267 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5268 rec->offset, rec->length);
5275 static int process_device_item(struct rb_root *dev_cache,
5276 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5278 struct btrfs_dev_item *ptr;
5279 struct device_record *rec;
5282 ptr = btrfs_item_ptr(eb,
5283 slot, struct btrfs_dev_item);
5285 rec = malloc(sizeof(*rec));
5287 fprintf(stderr, "memory allocation failed\n");
5291 rec->devid = key->offset;
5292 rec->generation = btrfs_header_generation(eb);
5294 rec->objectid = key->objectid;
5295 rec->type = key->type;
5296 rec->offset = key->offset;
5298 rec->devid = btrfs_device_id(eb, ptr);
5299 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5300 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5302 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5304 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5311 struct block_group_record *
5312 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5315 struct btrfs_block_group_item *ptr;
5316 struct block_group_record *rec;
5318 rec = calloc(1, sizeof(*rec));
5320 fprintf(stderr, "memory allocation failed\n");
5324 rec->cache.start = key->objectid;
5325 rec->cache.size = key->offset;
5327 rec->generation = btrfs_header_generation(leaf);
5329 rec->objectid = key->objectid;
5330 rec->type = key->type;
5331 rec->offset = key->offset;
5333 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5334 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5336 INIT_LIST_HEAD(&rec->list);
5341 static int process_block_group_item(struct block_group_tree *block_group_cache,
5342 struct btrfs_key *key,
5343 struct extent_buffer *eb, int slot)
5345 struct block_group_record *rec;
5348 rec = btrfs_new_block_group_record(eb, key, slot);
5349 ret = insert_block_group_record(block_group_cache, rec);
5351 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5352 rec->objectid, rec->offset);
5359 struct device_extent_record *
5360 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5361 struct btrfs_key *key, int slot)
5363 struct device_extent_record *rec;
5364 struct btrfs_dev_extent *ptr;
5366 rec = calloc(1, sizeof(*rec));
5368 fprintf(stderr, "memory allocation failed\n");
5372 rec->cache.objectid = key->objectid;
5373 rec->cache.start = key->offset;
5375 rec->generation = btrfs_header_generation(leaf);
5377 rec->objectid = key->objectid;
5378 rec->type = key->type;
5379 rec->offset = key->offset;
5381 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5382 rec->chunk_objecteid =
5383 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5385 btrfs_dev_extent_chunk_offset(leaf, ptr);
5386 rec->length = btrfs_dev_extent_length(leaf, ptr);
5387 rec->cache.size = rec->length;
5389 INIT_LIST_HEAD(&rec->chunk_list);
5390 INIT_LIST_HEAD(&rec->device_list);
5396 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5397 struct btrfs_key *key, struct extent_buffer *eb,
5400 struct device_extent_record *rec;
5403 rec = btrfs_new_device_extent_record(eb, key, slot);
5404 ret = insert_device_extent_record(dev_extent_cache, rec);
5407 "Device extent[%llu, %llu, %llu] existed.\n",
5408 rec->objectid, rec->offset, rec->length);
5415 static int process_extent_item(struct btrfs_root *root,
5416 struct cache_tree *extent_cache,
5417 struct extent_buffer *eb, int slot)
5419 struct btrfs_extent_item *ei;
5420 struct btrfs_extent_inline_ref *iref;
5421 struct btrfs_extent_data_ref *dref;
5422 struct btrfs_shared_data_ref *sref;
5423 struct btrfs_key key;
5424 struct extent_record tmpl;
5429 u32 item_size = btrfs_item_size_nr(eb, slot);
5435 btrfs_item_key_to_cpu(eb, &key, slot);
5437 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5439 num_bytes = root->nodesize;
5441 num_bytes = key.offset;
5444 if (!IS_ALIGNED(key.objectid, root->sectorsize)) {
5445 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
5446 key.objectid, root->sectorsize);
5449 if (item_size < sizeof(*ei)) {
5450 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5451 struct btrfs_extent_item_v0 *ei0;
5452 BUG_ON(item_size != sizeof(*ei0));
5453 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5454 refs = btrfs_extent_refs_v0(eb, ei0);
5458 memset(&tmpl, 0, sizeof(tmpl));
5459 tmpl.start = key.objectid;
5460 tmpl.nr = num_bytes;
5461 tmpl.extent_item_refs = refs;
5462 tmpl.metadata = metadata;
5464 tmpl.max_size = num_bytes;
5466 return add_extent_rec(extent_cache, &tmpl);
5469 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5470 refs = btrfs_extent_refs(eb, ei);
5471 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5475 if (metadata && num_bytes != root->nodesize) {
5476 error("ignore invalid metadata extent, length %llu does not equal to %u",
5477 num_bytes, root->nodesize);
5480 if (!metadata && !IS_ALIGNED(num_bytes, root->sectorsize)) {
5481 error("ignore invalid data extent, length %llu is not aligned to %u",
5482 num_bytes, root->sectorsize);
5486 memset(&tmpl, 0, sizeof(tmpl));
5487 tmpl.start = key.objectid;
5488 tmpl.nr = num_bytes;
5489 tmpl.extent_item_refs = refs;
5490 tmpl.metadata = metadata;
5492 tmpl.max_size = num_bytes;
5493 add_extent_rec(extent_cache, &tmpl);
5495 ptr = (unsigned long)(ei + 1);
5496 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5497 key.type == BTRFS_EXTENT_ITEM_KEY)
5498 ptr += sizeof(struct btrfs_tree_block_info);
5500 end = (unsigned long)ei + item_size;
5502 iref = (struct btrfs_extent_inline_ref *)ptr;
5503 type = btrfs_extent_inline_ref_type(eb, iref);
5504 offset = btrfs_extent_inline_ref_offset(eb, iref);
5506 case BTRFS_TREE_BLOCK_REF_KEY:
5507 ret = add_tree_backref(extent_cache, key.objectid,
5510 error("add_tree_backref failed: %s",
5513 case BTRFS_SHARED_BLOCK_REF_KEY:
5514 ret = add_tree_backref(extent_cache, key.objectid,
5517 error("add_tree_backref failed: %s",
5520 case BTRFS_EXTENT_DATA_REF_KEY:
5521 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5522 add_data_backref(extent_cache, key.objectid, 0,
5523 btrfs_extent_data_ref_root(eb, dref),
5524 btrfs_extent_data_ref_objectid(eb,
5526 btrfs_extent_data_ref_offset(eb, dref),
5527 btrfs_extent_data_ref_count(eb, dref),
5530 case BTRFS_SHARED_DATA_REF_KEY:
5531 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5532 add_data_backref(extent_cache, key.objectid, offset,
5534 btrfs_shared_data_ref_count(eb, sref),
5538 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5539 key.objectid, key.type, num_bytes);
5542 ptr += btrfs_extent_inline_ref_size(type);
5549 static int check_cache_range(struct btrfs_root *root,
5550 struct btrfs_block_group_cache *cache,
5551 u64 offset, u64 bytes)
5553 struct btrfs_free_space *entry;
5559 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5560 bytenr = btrfs_sb_offset(i);
5561 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5562 cache->key.objectid, bytenr, 0,
5563 &logical, &nr, &stripe_len);
5568 if (logical[nr] + stripe_len <= offset)
5570 if (offset + bytes <= logical[nr])
5572 if (logical[nr] == offset) {
5573 if (stripe_len >= bytes) {
5577 bytes -= stripe_len;
5578 offset += stripe_len;
5579 } else if (logical[nr] < offset) {
5580 if (logical[nr] + stripe_len >=
5585 bytes = (offset + bytes) -
5586 (logical[nr] + stripe_len);
5587 offset = logical[nr] + stripe_len;
5590 * Could be tricky, the super may land in the
5591 * middle of the area we're checking. First
5592 * check the easiest case, it's at the end.
5594 if (logical[nr] + stripe_len >=
5596 bytes = logical[nr] - offset;
5600 /* Check the left side */
5601 ret = check_cache_range(root, cache,
5603 logical[nr] - offset);
5609 /* Now we continue with the right side */
5610 bytes = (offset + bytes) -
5611 (logical[nr] + stripe_len);
5612 offset = logical[nr] + stripe_len;
5619 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5621 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5622 offset, offset+bytes);
5626 if (entry->offset != offset) {
5627 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5632 if (entry->bytes != bytes) {
5633 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5634 bytes, entry->bytes, offset);
5638 unlink_free_space(cache->free_space_ctl, entry);
5643 static int verify_space_cache(struct btrfs_root *root,
5644 struct btrfs_block_group_cache *cache)
5646 struct btrfs_path *path;
5647 struct extent_buffer *leaf;
5648 struct btrfs_key key;
5652 path = btrfs_alloc_path();
5656 root = root->fs_info->extent_root;
5658 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5660 key.objectid = last;
5662 key.type = BTRFS_EXTENT_ITEM_KEY;
5664 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5669 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5670 ret = btrfs_next_leaf(root, path);
5678 leaf = path->nodes[0];
5679 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5680 if (key.objectid >= cache->key.offset + cache->key.objectid)
5682 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5683 key.type != BTRFS_METADATA_ITEM_KEY) {
5688 if (last == key.objectid) {
5689 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5690 last = key.objectid + key.offset;
5692 last = key.objectid + root->nodesize;
5697 ret = check_cache_range(root, cache, last,
5698 key.objectid - last);
5701 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5702 last = key.objectid + key.offset;
5704 last = key.objectid + root->nodesize;
5708 if (last < cache->key.objectid + cache->key.offset)
5709 ret = check_cache_range(root, cache, last,
5710 cache->key.objectid +
5711 cache->key.offset - last);
5714 btrfs_free_path(path);
5717 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5718 fprintf(stderr, "There are still entries left in the space "
5726 static int check_space_cache(struct btrfs_root *root)
5728 struct btrfs_block_group_cache *cache;
5729 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5733 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5734 btrfs_super_generation(root->fs_info->super_copy) !=
5735 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5736 printf("cache and super generation don't match, space cache "
5737 "will be invalidated\n");
5741 if (ctx.progress_enabled) {
5742 ctx.tp = TASK_FREE_SPACE;
5743 task_start(ctx.info);
5747 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5751 start = cache->key.objectid + cache->key.offset;
5752 if (!cache->free_space_ctl) {
5753 if (btrfs_init_free_space_ctl(cache,
5754 root->sectorsize)) {
5759 btrfs_remove_free_space_cache(cache);
5762 if (btrfs_fs_compat_ro(root->fs_info,
5763 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5764 ret = exclude_super_stripes(root, cache);
5766 fprintf(stderr, "could not exclude super stripes: %s\n",
5771 ret = load_free_space_tree(root->fs_info, cache);
5772 free_excluded_extents(root, cache);
5774 fprintf(stderr, "could not load free space tree: %s\n",
5781 ret = load_free_space_cache(root->fs_info, cache);
5786 ret = verify_space_cache(root, cache);
5788 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5789 cache->key.objectid);
5794 task_stop(ctx.info);
5796 return error ? -EINVAL : 0;
5799 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5800 u64 num_bytes, unsigned long leaf_offset,
5801 struct extent_buffer *eb) {
5804 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5806 unsigned long csum_offset;
5810 u64 data_checked = 0;
5816 if (num_bytes % root->sectorsize)
5819 data = malloc(num_bytes);
5823 while (offset < num_bytes) {
5826 read_len = num_bytes - offset;
5827 /* read as much space once a time */
5828 ret = read_extent_data(root, data + offset,
5829 bytenr + offset, &read_len, mirror);
5833 /* verify every 4k data's checksum */
5834 while (data_checked < read_len) {
5836 tmp = offset + data_checked;
5838 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5839 csum, root->sectorsize);
5840 btrfs_csum_final(csum, (char *)&csum);
5842 csum_offset = leaf_offset +
5843 tmp / root->sectorsize * csum_size;
5844 read_extent_buffer(eb, (char *)&csum_expected,
5845 csum_offset, csum_size);
5846 /* try another mirror */
5847 if (csum != csum_expected) {
5848 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5849 mirror, bytenr + tmp,
5850 csum, csum_expected);
5851 num_copies = btrfs_num_copies(
5852 &root->fs_info->mapping_tree,
5854 if (mirror < num_copies - 1) {
5859 data_checked += root->sectorsize;
5868 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5871 struct btrfs_path *path;
5872 struct extent_buffer *leaf;
5873 struct btrfs_key key;
5876 path = btrfs_alloc_path();
5878 fprintf(stderr, "Error allocating path\n");
5882 key.objectid = bytenr;
5883 key.type = BTRFS_EXTENT_ITEM_KEY;
5884 key.offset = (u64)-1;
5887 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5890 fprintf(stderr, "Error looking up extent record %d\n", ret);
5891 btrfs_free_path(path);
5894 if (path->slots[0] > 0) {
5897 ret = btrfs_prev_leaf(root, path);
5900 } else if (ret > 0) {
5907 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5910 * Block group items come before extent items if they have the same
5911 * bytenr, so walk back one more just in case. Dear future traveller,
5912 * first congrats on mastering time travel. Now if it's not too much
5913 * trouble could you go back to 2006 and tell Chris to make the
5914 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5915 * EXTENT_ITEM_KEY please?
5917 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5918 if (path->slots[0] > 0) {
5921 ret = btrfs_prev_leaf(root, path);
5924 } else if (ret > 0) {
5929 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5933 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5934 ret = btrfs_next_leaf(root, path);
5936 fprintf(stderr, "Error going to next leaf "
5938 btrfs_free_path(path);
5944 leaf = path->nodes[0];
5945 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5946 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5950 if (key.objectid + key.offset < bytenr) {
5954 if (key.objectid > bytenr + num_bytes)
5957 if (key.objectid == bytenr) {
5958 if (key.offset >= num_bytes) {
5962 num_bytes -= key.offset;
5963 bytenr += key.offset;
5964 } else if (key.objectid < bytenr) {
5965 if (key.objectid + key.offset >= bytenr + num_bytes) {
5969 num_bytes = (bytenr + num_bytes) -
5970 (key.objectid + key.offset);
5971 bytenr = key.objectid + key.offset;
5973 if (key.objectid + key.offset < bytenr + num_bytes) {
5974 u64 new_start = key.objectid + key.offset;
5975 u64 new_bytes = bytenr + num_bytes - new_start;
5978 * Weird case, the extent is in the middle of
5979 * our range, we'll have to search one side
5980 * and then the other. Not sure if this happens
5981 * in real life, but no harm in coding it up
5982 * anyway just in case.
5984 btrfs_release_path(path);
5985 ret = check_extent_exists(root, new_start,
5988 fprintf(stderr, "Right section didn't "
5992 num_bytes = key.objectid - bytenr;
5995 num_bytes = key.objectid - bytenr;
6002 if (num_bytes && !ret) {
6003 fprintf(stderr, "There are no extents for csum range "
6004 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
6008 btrfs_free_path(path);
6012 static int check_csums(struct btrfs_root *root)
6014 struct btrfs_path *path;
6015 struct extent_buffer *leaf;
6016 struct btrfs_key key;
6017 u64 offset = 0, num_bytes = 0;
6018 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
6022 unsigned long leaf_offset;
6024 root = root->fs_info->csum_root;
6025 if (!extent_buffer_uptodate(root->node)) {
6026 fprintf(stderr, "No valid csum tree found\n");
6030 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
6031 key.type = BTRFS_EXTENT_CSUM_KEY;
6034 path = btrfs_alloc_path();
6038 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6040 fprintf(stderr, "Error searching csum tree %d\n", ret);
6041 btrfs_free_path(path);
6045 if (ret > 0 && path->slots[0])
6050 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6051 ret = btrfs_next_leaf(root, path);
6053 fprintf(stderr, "Error going to next leaf "
6060 leaf = path->nodes[0];
6062 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6063 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
6068 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
6069 csum_size) * root->sectorsize;
6070 if (!check_data_csum)
6071 goto skip_csum_check;
6072 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
6073 ret = check_extent_csums(root, key.offset, data_len,
6079 offset = key.offset;
6080 } else if (key.offset != offset + num_bytes) {
6081 ret = check_extent_exists(root, offset, num_bytes);
6083 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6084 "there is no extent record\n",
6085 offset, offset+num_bytes);
6088 offset = key.offset;
6091 num_bytes += data_len;
6095 btrfs_free_path(path);
6099 static int is_dropped_key(struct btrfs_key *key,
6100 struct btrfs_key *drop_key) {
6101 if (key->objectid < drop_key->objectid)
6103 else if (key->objectid == drop_key->objectid) {
6104 if (key->type < drop_key->type)
6106 else if (key->type == drop_key->type) {
6107 if (key->offset < drop_key->offset)
6115 * Here are the rules for FULL_BACKREF.
6117 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6118 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6120 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6121 * if it happened after the relocation occurred since we'll have dropped the
6122 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6123 * have no real way to know for sure.
6125 * We process the blocks one root at a time, and we start from the lowest root
6126 * objectid and go to the highest. So we can just lookup the owner backref for
6127 * the record and if we don't find it then we know it doesn't exist and we have
6130 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6131 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6132 * be set or not and then we can check later once we've gathered all the refs.
6134 static int calc_extent_flag(struct btrfs_root *root,
6135 struct cache_tree *extent_cache,
6136 struct extent_buffer *buf,
6137 struct root_item_record *ri,
6140 struct extent_record *rec;
6141 struct cache_extent *cache;
6142 struct tree_backref *tback;
6145 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6146 /* we have added this extent before */
6148 rec = container_of(cache, struct extent_record, cache);
6151 * Except file/reloc tree, we can not have
6154 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6159 if (buf->start == ri->bytenr)
6162 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6165 owner = btrfs_header_owner(buf);
6166 if (owner == ri->objectid)
6169 tback = find_tree_backref(rec, 0, owner);
6174 if (rec->flag_block_full_backref != FLAG_UNSET &&
6175 rec->flag_block_full_backref != 0)
6176 rec->bad_full_backref = 1;
6179 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6180 if (rec->flag_block_full_backref != FLAG_UNSET &&
6181 rec->flag_block_full_backref != 1)
6182 rec->bad_full_backref = 1;
6186 static void report_mismatch_key_root(u8 key_type, u64 rootid)
6188 fprintf(stderr, "Invalid key type(");
6189 print_key_type(stderr, 0, key_type);
6190 fprintf(stderr, ") found in root(");
6191 print_objectid(stderr, rootid, 0);
6192 fprintf(stderr, ")\n");
6196 * Check if the key is valid with its extent buffer.
6198 * This is a early check in case invalid key exists in a extent buffer
6199 * This is not comprehensive yet, but should prevent wrong key/item passed
6202 static int check_type_with_root(u64 rootid, u8 key_type)
6205 /* Only valid in chunk tree */
6206 case BTRFS_DEV_ITEM_KEY:
6207 case BTRFS_CHUNK_ITEM_KEY:
6208 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
6211 /* valid in csum and log tree */
6212 case BTRFS_CSUM_TREE_OBJECTID:
6213 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
6217 case BTRFS_EXTENT_ITEM_KEY:
6218 case BTRFS_METADATA_ITEM_KEY:
6219 case BTRFS_BLOCK_GROUP_ITEM_KEY:
6220 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
6223 case BTRFS_ROOT_ITEM_KEY:
6224 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
6227 case BTRFS_DEV_EXTENT_KEY:
6228 if (rootid != BTRFS_DEV_TREE_OBJECTID)
6234 report_mismatch_key_root(key_type, rootid);
6238 static int run_next_block(struct btrfs_root *root,
6239 struct block_info *bits,
6242 struct cache_tree *pending,
6243 struct cache_tree *seen,
6244 struct cache_tree *reada,
6245 struct cache_tree *nodes,
6246 struct cache_tree *extent_cache,
6247 struct cache_tree *chunk_cache,
6248 struct rb_root *dev_cache,
6249 struct block_group_tree *block_group_cache,
6250 struct device_extent_tree *dev_extent_cache,
6251 struct root_item_record *ri)
6253 struct extent_buffer *buf;
6254 struct extent_record *rec = NULL;
6265 struct btrfs_key key;
6266 struct cache_extent *cache;
6269 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6270 bits_nr, &reada_bits);
6275 for(i = 0; i < nritems; i++) {
6276 ret = add_cache_extent(reada, bits[i].start,
6281 /* fixme, get the parent transid */
6282 readahead_tree_block(root, bits[i].start,
6286 *last = bits[0].start;
6287 bytenr = bits[0].start;
6288 size = bits[0].size;
6290 cache = lookup_cache_extent(pending, bytenr, size);
6292 remove_cache_extent(pending, cache);
6295 cache = lookup_cache_extent(reada, bytenr, size);
6297 remove_cache_extent(reada, cache);
6300 cache = lookup_cache_extent(nodes, bytenr, size);
6302 remove_cache_extent(nodes, cache);
6305 cache = lookup_cache_extent(extent_cache, bytenr, size);
6307 rec = container_of(cache, struct extent_record, cache);
6308 gen = rec->parent_generation;
6311 /* fixme, get the real parent transid */
6312 buf = read_tree_block(root, bytenr, size, gen);
6313 if (!extent_buffer_uptodate(buf)) {
6314 record_bad_block_io(root->fs_info,
6315 extent_cache, bytenr, size);
6319 nritems = btrfs_header_nritems(buf);
6322 if (!init_extent_tree) {
6323 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6324 btrfs_header_level(buf), 1, NULL,
6327 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6329 fprintf(stderr, "Couldn't calc extent flags\n");
6330 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6335 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6337 fprintf(stderr, "Couldn't calc extent flags\n");
6338 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6342 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6344 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6345 ri->objectid == btrfs_header_owner(buf)) {
6347 * Ok we got to this block from it's original owner and
6348 * we have FULL_BACKREF set. Relocation can leave
6349 * converted blocks over so this is altogether possible,
6350 * however it's not possible if the generation > the
6351 * last snapshot, so check for this case.
6353 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6354 btrfs_header_generation(buf) > ri->last_snapshot) {
6355 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6356 rec->bad_full_backref = 1;
6361 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6362 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6363 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6364 rec->bad_full_backref = 1;
6368 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6369 rec->flag_block_full_backref = 1;
6373 rec->flag_block_full_backref = 0;
6375 owner = btrfs_header_owner(buf);
6378 ret = check_block(root, extent_cache, buf, flags);
6382 if (btrfs_is_leaf(buf)) {
6383 btree_space_waste += btrfs_leaf_free_space(root, buf);
6384 for (i = 0; i < nritems; i++) {
6385 struct btrfs_file_extent_item *fi;
6386 btrfs_item_key_to_cpu(buf, &key, i);
6388 * Check key type against the leaf owner.
6389 * Could filter quite a lot of early error if
6392 if (check_type_with_root(btrfs_header_owner(buf),
6394 fprintf(stderr, "ignoring invalid key\n");
6397 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6398 process_extent_item(root, extent_cache, buf,
6402 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6403 process_extent_item(root, extent_cache, buf,
6407 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6409 btrfs_item_size_nr(buf, i);
6412 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6413 process_chunk_item(chunk_cache, &key, buf, i);
6416 if (key.type == BTRFS_DEV_ITEM_KEY) {
6417 process_device_item(dev_cache, &key, buf, i);
6420 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6421 process_block_group_item(block_group_cache,
6425 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6426 process_device_extent_item(dev_extent_cache,
6431 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6432 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6433 process_extent_ref_v0(extent_cache, buf, i);
6440 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6441 ret = add_tree_backref(extent_cache,
6442 key.objectid, 0, key.offset, 0);
6444 error("add_tree_backref failed: %s",
6448 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6449 ret = add_tree_backref(extent_cache,
6450 key.objectid, key.offset, 0, 0);
6452 error("add_tree_backref failed: %s",
6456 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6457 struct btrfs_extent_data_ref *ref;
6458 ref = btrfs_item_ptr(buf, i,
6459 struct btrfs_extent_data_ref);
6460 add_data_backref(extent_cache,
6462 btrfs_extent_data_ref_root(buf, ref),
6463 btrfs_extent_data_ref_objectid(buf,
6465 btrfs_extent_data_ref_offset(buf, ref),
6466 btrfs_extent_data_ref_count(buf, ref),
6467 0, root->sectorsize);
6470 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6471 struct btrfs_shared_data_ref *ref;
6472 ref = btrfs_item_ptr(buf, i,
6473 struct btrfs_shared_data_ref);
6474 add_data_backref(extent_cache,
6475 key.objectid, key.offset, 0, 0, 0,
6476 btrfs_shared_data_ref_count(buf, ref),
6477 0, root->sectorsize);
6480 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6481 struct bad_item *bad;
6483 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6487 bad = malloc(sizeof(struct bad_item));
6490 INIT_LIST_HEAD(&bad->list);
6491 memcpy(&bad->key, &key,
6492 sizeof(struct btrfs_key));
6493 bad->root_id = owner;
6494 list_add_tail(&bad->list, &delete_items);
6497 if (key.type != BTRFS_EXTENT_DATA_KEY)
6499 fi = btrfs_item_ptr(buf, i,
6500 struct btrfs_file_extent_item);
6501 if (btrfs_file_extent_type(buf, fi) ==
6502 BTRFS_FILE_EXTENT_INLINE)
6504 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6507 data_bytes_allocated +=
6508 btrfs_file_extent_disk_num_bytes(buf, fi);
6509 if (data_bytes_allocated < root->sectorsize) {
6512 data_bytes_referenced +=
6513 btrfs_file_extent_num_bytes(buf, fi);
6514 add_data_backref(extent_cache,
6515 btrfs_file_extent_disk_bytenr(buf, fi),
6516 parent, owner, key.objectid, key.offset -
6517 btrfs_file_extent_offset(buf, fi), 1, 1,
6518 btrfs_file_extent_disk_num_bytes(buf, fi));
6522 struct btrfs_key first_key;
6524 first_key.objectid = 0;
6527 btrfs_item_key_to_cpu(buf, &first_key, 0);
6528 level = btrfs_header_level(buf);
6529 for (i = 0; i < nritems; i++) {
6530 struct extent_record tmpl;
6532 ptr = btrfs_node_blockptr(buf, i);
6533 size = root->nodesize;
6534 btrfs_node_key_to_cpu(buf, &key, i);
6536 if ((level == ri->drop_level)
6537 && is_dropped_key(&key, &ri->drop_key)) {
6542 memset(&tmpl, 0, sizeof(tmpl));
6543 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6544 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6549 tmpl.max_size = size;
6550 ret = add_extent_rec(extent_cache, &tmpl);
6554 ret = add_tree_backref(extent_cache, ptr, parent,
6557 error("add_tree_backref failed: %s",
6563 add_pending(nodes, seen, ptr, size);
6565 add_pending(pending, seen, ptr, size);
6568 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6569 nritems) * sizeof(struct btrfs_key_ptr);
6571 total_btree_bytes += buf->len;
6572 if (fs_root_objectid(btrfs_header_owner(buf)))
6573 total_fs_tree_bytes += buf->len;
6574 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6575 total_extent_tree_bytes += buf->len;
6576 if (!found_old_backref &&
6577 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6578 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6579 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6580 found_old_backref = 1;
6582 free_extent_buffer(buf);
6586 static int add_root_to_pending(struct extent_buffer *buf,
6587 struct cache_tree *extent_cache,
6588 struct cache_tree *pending,
6589 struct cache_tree *seen,
6590 struct cache_tree *nodes,
6593 struct extent_record tmpl;
6596 if (btrfs_header_level(buf) > 0)
6597 add_pending(nodes, seen, buf->start, buf->len);
6599 add_pending(pending, seen, buf->start, buf->len);
6601 memset(&tmpl, 0, sizeof(tmpl));
6602 tmpl.start = buf->start;
6607 tmpl.max_size = buf->len;
6608 add_extent_rec(extent_cache, &tmpl);
6610 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6611 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6612 ret = add_tree_backref(extent_cache, buf->start, buf->start,
6615 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
6620 /* as we fix the tree, we might be deleting blocks that
6621 * we're tracking for repair. This hook makes sure we
6622 * remove any backrefs for blocks as we are fixing them.
6624 static int free_extent_hook(struct btrfs_trans_handle *trans,
6625 struct btrfs_root *root,
6626 u64 bytenr, u64 num_bytes, u64 parent,
6627 u64 root_objectid, u64 owner, u64 offset,
6630 struct extent_record *rec;
6631 struct cache_extent *cache;
6633 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6635 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6636 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6640 rec = container_of(cache, struct extent_record, cache);
6642 struct data_backref *back;
6643 back = find_data_backref(rec, parent, root_objectid, owner,
6644 offset, 1, bytenr, num_bytes);
6647 if (back->node.found_ref) {
6648 back->found_ref -= refs_to_drop;
6650 rec->refs -= refs_to_drop;
6652 if (back->node.found_extent_tree) {
6653 back->num_refs -= refs_to_drop;
6654 if (rec->extent_item_refs)
6655 rec->extent_item_refs -= refs_to_drop;
6657 if (back->found_ref == 0)
6658 back->node.found_ref = 0;
6659 if (back->num_refs == 0)
6660 back->node.found_extent_tree = 0;
6662 if (!back->node.found_extent_tree && back->node.found_ref) {
6663 list_del(&back->node.list);
6667 struct tree_backref *back;
6668 back = find_tree_backref(rec, parent, root_objectid);
6671 if (back->node.found_ref) {
6674 back->node.found_ref = 0;
6676 if (back->node.found_extent_tree) {
6677 if (rec->extent_item_refs)
6678 rec->extent_item_refs--;
6679 back->node.found_extent_tree = 0;
6681 if (!back->node.found_extent_tree && back->node.found_ref) {
6682 list_del(&back->node.list);
6686 maybe_free_extent_rec(extent_cache, rec);
6691 static int delete_extent_records(struct btrfs_trans_handle *trans,
6692 struct btrfs_root *root,
6693 struct btrfs_path *path,
6694 u64 bytenr, u64 new_len)
6696 struct btrfs_key key;
6697 struct btrfs_key found_key;
6698 struct extent_buffer *leaf;
6703 key.objectid = bytenr;
6705 key.offset = (u64)-1;
6708 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6715 if (path->slots[0] == 0)
6721 leaf = path->nodes[0];
6722 slot = path->slots[0];
6724 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6725 if (found_key.objectid != bytenr)
6728 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6729 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6730 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6731 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6732 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6733 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6734 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6735 btrfs_release_path(path);
6736 if (found_key.type == 0) {
6737 if (found_key.offset == 0)
6739 key.offset = found_key.offset - 1;
6740 key.type = found_key.type;
6742 key.type = found_key.type - 1;
6743 key.offset = (u64)-1;
6747 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6748 found_key.objectid, found_key.type, found_key.offset);
6750 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6753 btrfs_release_path(path);
6755 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6756 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6757 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6758 found_key.offset : root->nodesize;
6760 ret = btrfs_update_block_group(trans, root, bytenr,
6767 btrfs_release_path(path);
6772 * for a single backref, this will allocate a new extent
6773 * and add the backref to it.
6775 static int record_extent(struct btrfs_trans_handle *trans,
6776 struct btrfs_fs_info *info,
6777 struct btrfs_path *path,
6778 struct extent_record *rec,
6779 struct extent_backref *back,
6780 int allocated, u64 flags)
6783 struct btrfs_root *extent_root = info->extent_root;
6784 struct extent_buffer *leaf;
6785 struct btrfs_key ins_key;
6786 struct btrfs_extent_item *ei;
6787 struct tree_backref *tback;
6788 struct data_backref *dback;
6789 struct btrfs_tree_block_info *bi;
6792 rec->max_size = max_t(u64, rec->max_size,
6793 info->extent_root->nodesize);
6796 u32 item_size = sizeof(*ei);
6799 item_size += sizeof(*bi);
6801 ins_key.objectid = rec->start;
6802 ins_key.offset = rec->max_size;
6803 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6805 ret = btrfs_insert_empty_item(trans, extent_root, path,
6806 &ins_key, item_size);
6810 leaf = path->nodes[0];
6811 ei = btrfs_item_ptr(leaf, path->slots[0],
6812 struct btrfs_extent_item);
6814 btrfs_set_extent_refs(leaf, ei, 0);
6815 btrfs_set_extent_generation(leaf, ei, rec->generation);
6817 if (back->is_data) {
6818 btrfs_set_extent_flags(leaf, ei,
6819 BTRFS_EXTENT_FLAG_DATA);
6821 struct btrfs_disk_key copy_key;;
6823 tback = to_tree_backref(back);
6824 bi = (struct btrfs_tree_block_info *)(ei + 1);
6825 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6828 btrfs_set_disk_key_objectid(©_key,
6829 rec->info_objectid);
6830 btrfs_set_disk_key_type(©_key, 0);
6831 btrfs_set_disk_key_offset(©_key, 0);
6833 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6834 btrfs_set_tree_block_key(leaf, bi, ©_key);
6836 btrfs_set_extent_flags(leaf, ei,
6837 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6840 btrfs_mark_buffer_dirty(leaf);
6841 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6842 rec->max_size, 1, 0);
6845 btrfs_release_path(path);
6848 if (back->is_data) {
6852 dback = to_data_backref(back);
6853 if (back->full_backref)
6854 parent = dback->parent;
6858 for (i = 0; i < dback->found_ref; i++) {
6859 /* if parent != 0, we're doing a full backref
6860 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6861 * just makes the backref allocator create a data
6864 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6865 rec->start, rec->max_size,
6869 BTRFS_FIRST_FREE_OBJECTID :
6875 fprintf(stderr, "adding new data backref"
6876 " on %llu %s %llu owner %llu"
6877 " offset %llu found %d\n",
6878 (unsigned long long)rec->start,
6879 back->full_backref ?
6881 back->full_backref ?
6882 (unsigned long long)parent :
6883 (unsigned long long)dback->root,
6884 (unsigned long long)dback->owner,
6885 (unsigned long long)dback->offset,
6890 tback = to_tree_backref(back);
6891 if (back->full_backref)
6892 parent = tback->parent;
6896 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6897 rec->start, rec->max_size,
6898 parent, tback->root, 0, 0);
6899 fprintf(stderr, "adding new tree backref on "
6900 "start %llu len %llu parent %llu root %llu\n",
6901 rec->start, rec->max_size, parent, tback->root);
6904 btrfs_release_path(path);
6908 static struct extent_entry *find_entry(struct list_head *entries,
6909 u64 bytenr, u64 bytes)
6911 struct extent_entry *entry = NULL;
6913 list_for_each_entry(entry, entries, list) {
6914 if (entry->bytenr == bytenr && entry->bytes == bytes)
6921 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6923 struct extent_entry *entry, *best = NULL, *prev = NULL;
6925 list_for_each_entry(entry, entries, list) {
6932 * If there are as many broken entries as entries then we know
6933 * not to trust this particular entry.
6935 if (entry->broken == entry->count)
6939 * If our current entry == best then we can't be sure our best
6940 * is really the best, so we need to keep searching.
6942 if (best && best->count == entry->count) {
6948 /* Prev == entry, not good enough, have to keep searching */
6949 if (!prev->broken && prev->count == entry->count)
6953 best = (prev->count > entry->count) ? prev : entry;
6954 else if (best->count < entry->count)
6962 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6963 struct data_backref *dback, struct extent_entry *entry)
6965 struct btrfs_trans_handle *trans;
6966 struct btrfs_root *root;
6967 struct btrfs_file_extent_item *fi;
6968 struct extent_buffer *leaf;
6969 struct btrfs_key key;
6973 key.objectid = dback->root;
6974 key.type = BTRFS_ROOT_ITEM_KEY;
6975 key.offset = (u64)-1;
6976 root = btrfs_read_fs_root(info, &key);
6978 fprintf(stderr, "Couldn't find root for our ref\n");
6983 * The backref points to the original offset of the extent if it was
6984 * split, so we need to search down to the offset we have and then walk
6985 * forward until we find the backref we're looking for.
6987 key.objectid = dback->owner;
6988 key.type = BTRFS_EXTENT_DATA_KEY;
6989 key.offset = dback->offset;
6990 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6992 fprintf(stderr, "Error looking up ref %d\n", ret);
6997 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6998 ret = btrfs_next_leaf(root, path);
7000 fprintf(stderr, "Couldn't find our ref, next\n");
7004 leaf = path->nodes[0];
7005 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7006 if (key.objectid != dback->owner ||
7007 key.type != BTRFS_EXTENT_DATA_KEY) {
7008 fprintf(stderr, "Couldn't find our ref, search\n");
7011 fi = btrfs_item_ptr(leaf, path->slots[0],
7012 struct btrfs_file_extent_item);
7013 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7014 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7016 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
7021 btrfs_release_path(path);
7023 trans = btrfs_start_transaction(root, 1);
7025 return PTR_ERR(trans);
7028 * Ok we have the key of the file extent we want to fix, now we can cow
7029 * down to the thing and fix it.
7031 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7033 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
7034 key.objectid, key.type, key.offset, ret);
7038 fprintf(stderr, "Well that's odd, we just found this key "
7039 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
7044 leaf = path->nodes[0];
7045 fi = btrfs_item_ptr(leaf, path->slots[0],
7046 struct btrfs_file_extent_item);
7048 if (btrfs_file_extent_compression(leaf, fi) &&
7049 dback->disk_bytenr != entry->bytenr) {
7050 fprintf(stderr, "Ref doesn't match the record start and is "
7051 "compressed, please take a btrfs-image of this file "
7052 "system and send it to a btrfs developer so they can "
7053 "complete this functionality for bytenr %Lu\n",
7054 dback->disk_bytenr);
7059 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
7060 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7061 } else if (dback->disk_bytenr > entry->bytenr) {
7062 u64 off_diff, offset;
7064 off_diff = dback->disk_bytenr - entry->bytenr;
7065 offset = btrfs_file_extent_offset(leaf, fi);
7066 if (dback->disk_bytenr + offset +
7067 btrfs_file_extent_num_bytes(leaf, fi) >
7068 entry->bytenr + entry->bytes) {
7069 fprintf(stderr, "Ref is past the entry end, please "
7070 "take a btrfs-image of this file system and "
7071 "send it to a btrfs developer, ref %Lu\n",
7072 dback->disk_bytenr);
7077 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7078 btrfs_set_file_extent_offset(leaf, fi, offset);
7079 } else if (dback->disk_bytenr < entry->bytenr) {
7082 offset = btrfs_file_extent_offset(leaf, fi);
7083 if (dback->disk_bytenr + offset < entry->bytenr) {
7084 fprintf(stderr, "Ref is before the entry start, please"
7085 " take a btrfs-image of this file system and "
7086 "send it to a btrfs developer, ref %Lu\n",
7087 dback->disk_bytenr);
7092 offset += dback->disk_bytenr;
7093 offset -= entry->bytenr;
7094 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
7095 btrfs_set_file_extent_offset(leaf, fi, offset);
7098 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
7101 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
7102 * only do this if we aren't using compression, otherwise it's a
7105 if (!btrfs_file_extent_compression(leaf, fi))
7106 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
7108 printf("ram bytes may be wrong?\n");
7109 btrfs_mark_buffer_dirty(leaf);
7111 err = btrfs_commit_transaction(trans, root);
7112 btrfs_release_path(path);
7113 return ret ? ret : err;
7116 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
7117 struct extent_record *rec)
7119 struct extent_backref *back;
7120 struct data_backref *dback;
7121 struct extent_entry *entry, *best = NULL;
7124 int broken_entries = 0;
7129 * Metadata is easy and the backrefs should always agree on bytenr and
7130 * size, if not we've got bigger issues.
7135 list_for_each_entry(back, &rec->backrefs, list) {
7136 if (back->full_backref || !back->is_data)
7139 dback = to_data_backref(back);
7142 * We only pay attention to backrefs that we found a real
7145 if (dback->found_ref == 0)
7149 * For now we only catch when the bytes don't match, not the
7150 * bytenr. We can easily do this at the same time, but I want
7151 * to have a fs image to test on before we just add repair
7152 * functionality willy-nilly so we know we won't screw up the
7156 entry = find_entry(&entries, dback->disk_bytenr,
7159 entry = malloc(sizeof(struct extent_entry));
7164 memset(entry, 0, sizeof(*entry));
7165 entry->bytenr = dback->disk_bytenr;
7166 entry->bytes = dback->bytes;
7167 list_add_tail(&entry->list, &entries);
7172 * If we only have on entry we may think the entries agree when
7173 * in reality they don't so we have to do some extra checking.
7175 if (dback->disk_bytenr != rec->start ||
7176 dback->bytes != rec->nr || back->broken)
7187 /* Yay all the backrefs agree, carry on good sir */
7188 if (nr_entries <= 1 && !mismatch)
7191 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7192 "%Lu\n", rec->start);
7195 * First we want to see if the backrefs can agree amongst themselves who
7196 * is right, so figure out which one of the entries has the highest
7199 best = find_most_right_entry(&entries);
7202 * Ok so we may have an even split between what the backrefs think, so
7203 * this is where we use the extent ref to see what it thinks.
7206 entry = find_entry(&entries, rec->start, rec->nr);
7207 if (!entry && (!broken_entries || !rec->found_rec)) {
7208 fprintf(stderr, "Backrefs don't agree with each other "
7209 "and extent record doesn't agree with anybody,"
7210 " so we can't fix bytenr %Lu bytes %Lu\n",
7211 rec->start, rec->nr);
7214 } else if (!entry) {
7216 * Ok our backrefs were broken, we'll assume this is the
7217 * correct value and add an entry for this range.
7219 entry = malloc(sizeof(struct extent_entry));
7224 memset(entry, 0, sizeof(*entry));
7225 entry->bytenr = rec->start;
7226 entry->bytes = rec->nr;
7227 list_add_tail(&entry->list, &entries);
7231 best = find_most_right_entry(&entries);
7233 fprintf(stderr, "Backrefs and extent record evenly "
7234 "split on who is right, this is going to "
7235 "require user input to fix bytenr %Lu bytes "
7236 "%Lu\n", rec->start, rec->nr);
7243 * I don't think this can happen currently as we'll abort() if we catch
7244 * this case higher up, but in case somebody removes that we still can't
7245 * deal with it properly here yet, so just bail out of that's the case.
7247 if (best->bytenr != rec->start) {
7248 fprintf(stderr, "Extent start and backref starts don't match, "
7249 "please use btrfs-image on this file system and send "
7250 "it to a btrfs developer so they can make fsck fix "
7251 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7252 rec->start, rec->nr);
7258 * Ok great we all agreed on an extent record, let's go find the real
7259 * references and fix up the ones that don't match.
7261 list_for_each_entry(back, &rec->backrefs, list) {
7262 if (back->full_backref || !back->is_data)
7265 dback = to_data_backref(back);
7268 * Still ignoring backrefs that don't have a real ref attached
7271 if (dback->found_ref == 0)
7274 if (dback->bytes == best->bytes &&
7275 dback->disk_bytenr == best->bytenr)
7278 ret = repair_ref(info, path, dback, best);
7284 * Ok we messed with the actual refs, which means we need to drop our
7285 * entire cache and go back and rescan. I know this is a huge pain and
7286 * adds a lot of extra work, but it's the only way to be safe. Once all
7287 * the backrefs agree we may not need to do anything to the extent
7292 while (!list_empty(&entries)) {
7293 entry = list_entry(entries.next, struct extent_entry, list);
7294 list_del_init(&entry->list);
7300 static int process_duplicates(struct btrfs_root *root,
7301 struct cache_tree *extent_cache,
7302 struct extent_record *rec)
7304 struct extent_record *good, *tmp;
7305 struct cache_extent *cache;
7309 * If we found a extent record for this extent then return, or if we
7310 * have more than one duplicate we are likely going to need to delete
7313 if (rec->found_rec || rec->num_duplicates > 1)
7316 /* Shouldn't happen but just in case */
7317 BUG_ON(!rec->num_duplicates);
7320 * So this happens if we end up with a backref that doesn't match the
7321 * actual extent entry. So either the backref is bad or the extent
7322 * entry is bad. Either way we want to have the extent_record actually
7323 * reflect what we found in the extent_tree, so we need to take the
7324 * duplicate out and use that as the extent_record since the only way we
7325 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7327 remove_cache_extent(extent_cache, &rec->cache);
7329 good = to_extent_record(rec->dups.next);
7330 list_del_init(&good->list);
7331 INIT_LIST_HEAD(&good->backrefs);
7332 INIT_LIST_HEAD(&good->dups);
7333 good->cache.start = good->start;
7334 good->cache.size = good->nr;
7335 good->content_checked = 0;
7336 good->owner_ref_checked = 0;
7337 good->num_duplicates = 0;
7338 good->refs = rec->refs;
7339 list_splice_init(&rec->backrefs, &good->backrefs);
7341 cache = lookup_cache_extent(extent_cache, good->start,
7345 tmp = container_of(cache, struct extent_record, cache);
7348 * If we find another overlapping extent and it's found_rec is
7349 * set then it's a duplicate and we need to try and delete
7352 if (tmp->found_rec || tmp->num_duplicates > 0) {
7353 if (list_empty(&good->list))
7354 list_add_tail(&good->list,
7355 &duplicate_extents);
7356 good->num_duplicates += tmp->num_duplicates + 1;
7357 list_splice_init(&tmp->dups, &good->dups);
7358 list_del_init(&tmp->list);
7359 list_add_tail(&tmp->list, &good->dups);
7360 remove_cache_extent(extent_cache, &tmp->cache);
7365 * Ok we have another non extent item backed extent rec, so lets
7366 * just add it to this extent and carry on like we did above.
7368 good->refs += tmp->refs;
7369 list_splice_init(&tmp->backrefs, &good->backrefs);
7370 remove_cache_extent(extent_cache, &tmp->cache);
7373 ret = insert_cache_extent(extent_cache, &good->cache);
7376 return good->num_duplicates ? 0 : 1;
7379 static int delete_duplicate_records(struct btrfs_root *root,
7380 struct extent_record *rec)
7382 struct btrfs_trans_handle *trans;
7383 LIST_HEAD(delete_list);
7384 struct btrfs_path *path;
7385 struct extent_record *tmp, *good, *n;
7388 struct btrfs_key key;
7390 path = btrfs_alloc_path();
7397 /* Find the record that covers all of the duplicates. */
7398 list_for_each_entry(tmp, &rec->dups, list) {
7399 if (good->start < tmp->start)
7401 if (good->nr > tmp->nr)
7404 if (tmp->start + tmp->nr < good->start + good->nr) {
7405 fprintf(stderr, "Ok we have overlapping extents that "
7406 "aren't completely covered by each other, this "
7407 "is going to require more careful thought. "
7408 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7409 tmp->start, tmp->nr, good->start, good->nr);
7416 list_add_tail(&rec->list, &delete_list);
7418 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7421 list_move_tail(&tmp->list, &delete_list);
7424 root = root->fs_info->extent_root;
7425 trans = btrfs_start_transaction(root, 1);
7426 if (IS_ERR(trans)) {
7427 ret = PTR_ERR(trans);
7431 list_for_each_entry(tmp, &delete_list, list) {
7432 if (tmp->found_rec == 0)
7434 key.objectid = tmp->start;
7435 key.type = BTRFS_EXTENT_ITEM_KEY;
7436 key.offset = tmp->nr;
7438 /* Shouldn't happen but just in case */
7439 if (tmp->metadata) {
7440 fprintf(stderr, "Well this shouldn't happen, extent "
7441 "record overlaps but is metadata? "
7442 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7446 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7452 ret = btrfs_del_item(trans, root, path);
7455 btrfs_release_path(path);
7458 err = btrfs_commit_transaction(trans, root);
7462 while (!list_empty(&delete_list)) {
7463 tmp = to_extent_record(delete_list.next);
7464 list_del_init(&tmp->list);
7470 while (!list_empty(&rec->dups)) {
7471 tmp = to_extent_record(rec->dups.next);
7472 list_del_init(&tmp->list);
7476 btrfs_free_path(path);
7478 if (!ret && !nr_del)
7479 rec->num_duplicates = 0;
7481 return ret ? ret : nr_del;
7484 static int find_possible_backrefs(struct btrfs_fs_info *info,
7485 struct btrfs_path *path,
7486 struct cache_tree *extent_cache,
7487 struct extent_record *rec)
7489 struct btrfs_root *root;
7490 struct extent_backref *back;
7491 struct data_backref *dback;
7492 struct cache_extent *cache;
7493 struct btrfs_file_extent_item *fi;
7494 struct btrfs_key key;
7498 list_for_each_entry(back, &rec->backrefs, list) {
7499 /* Don't care about full backrefs (poor unloved backrefs) */
7500 if (back->full_backref || !back->is_data)
7503 dback = to_data_backref(back);
7505 /* We found this one, we don't need to do a lookup */
7506 if (dback->found_ref)
7509 key.objectid = dback->root;
7510 key.type = BTRFS_ROOT_ITEM_KEY;
7511 key.offset = (u64)-1;
7513 root = btrfs_read_fs_root(info, &key);
7515 /* No root, definitely a bad ref, skip */
7516 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7518 /* Other err, exit */
7520 return PTR_ERR(root);
7522 key.objectid = dback->owner;
7523 key.type = BTRFS_EXTENT_DATA_KEY;
7524 key.offset = dback->offset;
7525 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7527 btrfs_release_path(path);
7530 /* Didn't find it, we can carry on */
7535 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7536 struct btrfs_file_extent_item);
7537 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7538 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7539 btrfs_release_path(path);
7540 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7542 struct extent_record *tmp;
7543 tmp = container_of(cache, struct extent_record, cache);
7546 * If we found an extent record for the bytenr for this
7547 * particular backref then we can't add it to our
7548 * current extent record. We only want to add backrefs
7549 * that don't have a corresponding extent item in the
7550 * extent tree since they likely belong to this record
7551 * and we need to fix it if it doesn't match bytenrs.
7557 dback->found_ref += 1;
7558 dback->disk_bytenr = bytenr;
7559 dback->bytes = bytes;
7562 * Set this so the verify backref code knows not to trust the
7563 * values in this backref.
7572 * Record orphan data ref into corresponding root.
7574 * Return 0 if the extent item contains data ref and recorded.
7575 * Return 1 if the extent item contains no useful data ref
7576 * On that case, it may contains only shared_dataref or metadata backref
7577 * or the file extent exists(this should be handled by the extent bytenr
7579 * Return <0 if something goes wrong.
7581 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7582 struct extent_record *rec)
7584 struct btrfs_key key;
7585 struct btrfs_root *dest_root;
7586 struct extent_backref *back;
7587 struct data_backref *dback;
7588 struct orphan_data_extent *orphan;
7589 struct btrfs_path *path;
7590 int recorded_data_ref = 0;
7595 path = btrfs_alloc_path();
7598 list_for_each_entry(back, &rec->backrefs, list) {
7599 if (back->full_backref || !back->is_data ||
7600 !back->found_extent_tree)
7602 dback = to_data_backref(back);
7603 if (dback->found_ref)
7605 key.objectid = dback->root;
7606 key.type = BTRFS_ROOT_ITEM_KEY;
7607 key.offset = (u64)-1;
7609 dest_root = btrfs_read_fs_root(fs_info, &key);
7611 /* For non-exist root we just skip it */
7612 if (IS_ERR(dest_root) || !dest_root)
7615 key.objectid = dback->owner;
7616 key.type = BTRFS_EXTENT_DATA_KEY;
7617 key.offset = dback->offset;
7619 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7621 * For ret < 0, it's OK since the fs-tree may be corrupted,
7622 * we need to record it for inode/file extent rebuild.
7623 * For ret > 0, we record it only for file extent rebuild.
7624 * For ret == 0, the file extent exists but only bytenr
7625 * mismatch, let the original bytenr fix routine to handle,
7631 orphan = malloc(sizeof(*orphan));
7636 INIT_LIST_HEAD(&orphan->list);
7637 orphan->root = dback->root;
7638 orphan->objectid = dback->owner;
7639 orphan->offset = dback->offset;
7640 orphan->disk_bytenr = rec->cache.start;
7641 orphan->disk_len = rec->cache.size;
7642 list_add(&dest_root->orphan_data_extents, &orphan->list);
7643 recorded_data_ref = 1;
7646 btrfs_free_path(path);
7648 return !recorded_data_ref;
7654 * when an incorrect extent item is found, this will delete
7655 * all of the existing entries for it and recreate them
7656 * based on what the tree scan found.
7658 static int fixup_extent_refs(struct btrfs_fs_info *info,
7659 struct cache_tree *extent_cache,
7660 struct extent_record *rec)
7662 struct btrfs_trans_handle *trans = NULL;
7664 struct btrfs_path *path;
7665 struct list_head *cur = rec->backrefs.next;
7666 struct cache_extent *cache;
7667 struct extent_backref *back;
7671 if (rec->flag_block_full_backref)
7672 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7674 path = btrfs_alloc_path();
7678 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7680 * Sometimes the backrefs themselves are so broken they don't
7681 * get attached to any meaningful rec, so first go back and
7682 * check any of our backrefs that we couldn't find and throw
7683 * them into the list if we find the backref so that
7684 * verify_backrefs can figure out what to do.
7686 ret = find_possible_backrefs(info, path, extent_cache, rec);
7691 /* step one, make sure all of the backrefs agree */
7692 ret = verify_backrefs(info, path, rec);
7696 trans = btrfs_start_transaction(info->extent_root, 1);
7697 if (IS_ERR(trans)) {
7698 ret = PTR_ERR(trans);
7702 /* step two, delete all the existing records */
7703 ret = delete_extent_records(trans, info->extent_root, path,
7704 rec->start, rec->max_size);
7709 /* was this block corrupt? If so, don't add references to it */
7710 cache = lookup_cache_extent(info->corrupt_blocks,
7711 rec->start, rec->max_size);
7717 /* step three, recreate all the refs we did find */
7718 while(cur != &rec->backrefs) {
7719 back = to_extent_backref(cur);
7723 * if we didn't find any references, don't create a
7726 if (!back->found_ref)
7729 rec->bad_full_backref = 0;
7730 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7738 int err = btrfs_commit_transaction(trans, info->extent_root);
7743 btrfs_free_path(path);
7747 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7748 struct extent_record *rec)
7750 struct btrfs_trans_handle *trans;
7751 struct btrfs_root *root = fs_info->extent_root;
7752 struct btrfs_path *path;
7753 struct btrfs_extent_item *ei;
7754 struct btrfs_key key;
7758 key.objectid = rec->start;
7759 if (rec->metadata) {
7760 key.type = BTRFS_METADATA_ITEM_KEY;
7761 key.offset = rec->info_level;
7763 key.type = BTRFS_EXTENT_ITEM_KEY;
7764 key.offset = rec->max_size;
7767 path = btrfs_alloc_path();
7771 trans = btrfs_start_transaction(root, 0);
7772 if (IS_ERR(trans)) {
7773 btrfs_free_path(path);
7774 return PTR_ERR(trans);
7777 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7779 btrfs_free_path(path);
7780 btrfs_commit_transaction(trans, root);
7783 fprintf(stderr, "Didn't find extent for %llu\n",
7784 (unsigned long long)rec->start);
7785 btrfs_free_path(path);
7786 btrfs_commit_transaction(trans, root);
7790 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7791 struct btrfs_extent_item);
7792 flags = btrfs_extent_flags(path->nodes[0], ei);
7793 if (rec->flag_block_full_backref) {
7794 fprintf(stderr, "setting full backref on %llu\n",
7795 (unsigned long long)key.objectid);
7796 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7798 fprintf(stderr, "clearing full backref on %llu\n",
7799 (unsigned long long)key.objectid);
7800 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7802 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7803 btrfs_mark_buffer_dirty(path->nodes[0]);
7804 btrfs_free_path(path);
7805 return btrfs_commit_transaction(trans, root);
7808 /* right now we only prune from the extent allocation tree */
7809 static int prune_one_block(struct btrfs_trans_handle *trans,
7810 struct btrfs_fs_info *info,
7811 struct btrfs_corrupt_block *corrupt)
7814 struct btrfs_path path;
7815 struct extent_buffer *eb;
7819 int level = corrupt->level + 1;
7821 btrfs_init_path(&path);
7823 /* we want to stop at the parent to our busted block */
7824 path.lowest_level = level;
7826 ret = btrfs_search_slot(trans, info->extent_root,
7827 &corrupt->key, &path, -1, 1);
7832 eb = path.nodes[level];
7839 * hopefully the search gave us the block we want to prune,
7840 * lets try that first
7842 slot = path.slots[level];
7843 found = btrfs_node_blockptr(eb, slot);
7844 if (found == corrupt->cache.start)
7847 nritems = btrfs_header_nritems(eb);
7849 /* the search failed, lets scan this node and hope we find it */
7850 for (slot = 0; slot < nritems; slot++) {
7851 found = btrfs_node_blockptr(eb, slot);
7852 if (found == corrupt->cache.start)
7856 * we couldn't find the bad block. TODO, search all the nodes for pointers
7859 if (eb == info->extent_root->node) {
7864 btrfs_release_path(&path);
7869 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7870 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7873 btrfs_release_path(&path);
7877 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7879 struct btrfs_trans_handle *trans = NULL;
7880 struct cache_extent *cache;
7881 struct btrfs_corrupt_block *corrupt;
7884 cache = search_cache_extent(info->corrupt_blocks, 0);
7888 trans = btrfs_start_transaction(info->extent_root, 1);
7890 return PTR_ERR(trans);
7892 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7893 prune_one_block(trans, info, corrupt);
7894 remove_cache_extent(info->corrupt_blocks, cache);
7897 return btrfs_commit_transaction(trans, info->extent_root);
7901 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7903 struct btrfs_block_group_cache *cache;
7908 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7909 &start, &end, EXTENT_DIRTY);
7912 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7918 cache = btrfs_lookup_first_block_group(fs_info, start);
7923 start = cache->key.objectid + cache->key.offset;
7927 static int check_extent_refs(struct btrfs_root *root,
7928 struct cache_tree *extent_cache)
7930 struct extent_record *rec;
7931 struct cache_extent *cache;
7940 * if we're doing a repair, we have to make sure
7941 * we don't allocate from the problem extents.
7942 * In the worst case, this will be all the
7945 cache = search_cache_extent(extent_cache, 0);
7947 rec = container_of(cache, struct extent_record, cache);
7948 set_extent_dirty(root->fs_info->excluded_extents,
7950 rec->start + rec->max_size - 1,
7952 cache = next_cache_extent(cache);
7955 /* pin down all the corrupted blocks too */
7956 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7958 set_extent_dirty(root->fs_info->excluded_extents,
7960 cache->start + cache->size - 1,
7962 cache = next_cache_extent(cache);
7964 prune_corrupt_blocks(root->fs_info);
7965 reset_cached_block_groups(root->fs_info);
7968 reset_cached_block_groups(root->fs_info);
7971 * We need to delete any duplicate entries we find first otherwise we
7972 * could mess up the extent tree when we have backrefs that actually
7973 * belong to a different extent item and not the weird duplicate one.
7975 while (repair && !list_empty(&duplicate_extents)) {
7976 rec = to_extent_record(duplicate_extents.next);
7977 list_del_init(&rec->list);
7979 /* Sometimes we can find a backref before we find an actual
7980 * extent, so we need to process it a little bit to see if there
7981 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7982 * if this is a backref screwup. If we need to delete stuff
7983 * process_duplicates() will return 0, otherwise it will return
7986 if (process_duplicates(root, extent_cache, rec))
7988 ret = delete_duplicate_records(root, rec);
7992 * delete_duplicate_records will return the number of entries
7993 * deleted, so if it's greater than 0 then we know we actually
7994 * did something and we need to remove.
8008 cache = search_cache_extent(extent_cache, 0);
8011 rec = container_of(cache, struct extent_record, cache);
8012 if (rec->num_duplicates) {
8013 fprintf(stderr, "extent item %llu has multiple extent "
8014 "items\n", (unsigned long long)rec->start);
8019 if (rec->refs != rec->extent_item_refs) {
8020 fprintf(stderr, "ref mismatch on [%llu %llu] ",
8021 (unsigned long long)rec->start,
8022 (unsigned long long)rec->nr);
8023 fprintf(stderr, "extent item %llu, found %llu\n",
8024 (unsigned long long)rec->extent_item_refs,
8025 (unsigned long long)rec->refs);
8026 ret = record_orphan_data_extents(root->fs_info, rec);
8033 * we can't use the extent to repair file
8034 * extent, let the fallback method handle it.
8036 if (!fixed && repair) {
8037 ret = fixup_extent_refs(
8048 if (all_backpointers_checked(rec, 1)) {
8049 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
8050 (unsigned long long)rec->start,
8051 (unsigned long long)rec->nr);
8053 if (!fixed && !recorded && repair) {
8054 ret = fixup_extent_refs(root->fs_info,
8063 if (!rec->owner_ref_checked) {
8064 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
8065 (unsigned long long)rec->start,
8066 (unsigned long long)rec->nr);
8067 if (!fixed && !recorded && repair) {
8068 ret = fixup_extent_refs(root->fs_info,
8077 if (rec->bad_full_backref) {
8078 fprintf(stderr, "bad full backref, on [%llu]\n",
8079 (unsigned long long)rec->start);
8081 ret = fixup_extent_flags(root->fs_info, rec);
8090 * Although it's not a extent ref's problem, we reuse this
8091 * routine for error reporting.
8092 * No repair function yet.
8094 if (rec->crossing_stripes) {
8096 "bad metadata [%llu, %llu) crossing stripe boundary\n",
8097 rec->start, rec->start + rec->max_size);
8102 if (rec->wrong_chunk_type) {
8104 "bad extent [%llu, %llu), type mismatch with chunk\n",
8105 rec->start, rec->start + rec->max_size);
8110 remove_cache_extent(extent_cache, cache);
8111 free_all_extent_backrefs(rec);
8112 if (!init_extent_tree && repair && (!cur_err || fixed))
8113 clear_extent_dirty(root->fs_info->excluded_extents,
8115 rec->start + rec->max_size - 1,
8121 if (ret && ret != -EAGAIN) {
8122 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
8125 struct btrfs_trans_handle *trans;
8127 root = root->fs_info->extent_root;
8128 trans = btrfs_start_transaction(root, 1);
8129 if (IS_ERR(trans)) {
8130 ret = PTR_ERR(trans);
8134 btrfs_fix_block_accounting(trans, root);
8135 ret = btrfs_commit_transaction(trans, root);
8140 fprintf(stderr, "repaired damaged extent references\n");
8146 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
8150 if (type & BTRFS_BLOCK_GROUP_RAID0) {
8151 stripe_size = length;
8152 stripe_size /= num_stripes;
8153 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
8154 stripe_size = length * 2;
8155 stripe_size /= num_stripes;
8156 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
8157 stripe_size = length;
8158 stripe_size /= (num_stripes - 1);
8159 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8160 stripe_size = length;
8161 stripe_size /= (num_stripes - 2);
8163 stripe_size = length;
8169 * Check the chunk with its block group/dev list ref:
8170 * Return 0 if all refs seems valid.
8171 * Return 1 if part of refs seems valid, need later check for rebuild ref
8172 * like missing block group and needs to search extent tree to rebuild them.
8173 * Return -1 if essential refs are missing and unable to rebuild.
8175 static int check_chunk_refs(struct chunk_record *chunk_rec,
8176 struct block_group_tree *block_group_cache,
8177 struct device_extent_tree *dev_extent_cache,
8180 struct cache_extent *block_group_item;
8181 struct block_group_record *block_group_rec;
8182 struct cache_extent *dev_extent_item;
8183 struct device_extent_record *dev_extent_rec;
8187 int metadump_v2 = 0;
8191 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8194 if (block_group_item) {
8195 block_group_rec = container_of(block_group_item,
8196 struct block_group_record,
8198 if (chunk_rec->length != block_group_rec->offset ||
8199 chunk_rec->offset != block_group_rec->objectid ||
8201 chunk_rec->type_flags != block_group_rec->flags)) {
8204 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8205 chunk_rec->objectid,
8210 chunk_rec->type_flags,
8211 block_group_rec->objectid,
8212 block_group_rec->type,
8213 block_group_rec->offset,
8214 block_group_rec->offset,
8215 block_group_rec->objectid,
8216 block_group_rec->flags);
8219 list_del_init(&block_group_rec->list);
8220 chunk_rec->bg_rec = block_group_rec;
8225 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8226 chunk_rec->objectid,
8231 chunk_rec->type_flags);
8238 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8239 chunk_rec->num_stripes);
8240 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8241 devid = chunk_rec->stripes[i].devid;
8242 offset = chunk_rec->stripes[i].offset;
8243 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8244 devid, offset, length);
8245 if (dev_extent_item) {
8246 dev_extent_rec = container_of(dev_extent_item,
8247 struct device_extent_record,
8249 if (dev_extent_rec->objectid != devid ||
8250 dev_extent_rec->offset != offset ||
8251 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8252 dev_extent_rec->length != length) {
8255 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8256 chunk_rec->objectid,
8259 chunk_rec->stripes[i].devid,
8260 chunk_rec->stripes[i].offset,
8261 dev_extent_rec->objectid,
8262 dev_extent_rec->offset,
8263 dev_extent_rec->length);
8266 list_move(&dev_extent_rec->chunk_list,
8267 &chunk_rec->dextents);
8272 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8273 chunk_rec->objectid,
8276 chunk_rec->stripes[i].devid,
8277 chunk_rec->stripes[i].offset);
8284 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8285 int check_chunks(struct cache_tree *chunk_cache,
8286 struct block_group_tree *block_group_cache,
8287 struct device_extent_tree *dev_extent_cache,
8288 struct list_head *good, struct list_head *bad,
8289 struct list_head *rebuild, int silent)
8291 struct cache_extent *chunk_item;
8292 struct chunk_record *chunk_rec;
8293 struct block_group_record *bg_rec;
8294 struct device_extent_record *dext_rec;
8298 chunk_item = first_cache_extent(chunk_cache);
8299 while (chunk_item) {
8300 chunk_rec = container_of(chunk_item, struct chunk_record,
8302 err = check_chunk_refs(chunk_rec, block_group_cache,
8303 dev_extent_cache, silent);
8306 if (err == 0 && good)
8307 list_add_tail(&chunk_rec->list, good);
8308 if (err > 0 && rebuild)
8309 list_add_tail(&chunk_rec->list, rebuild);
8311 list_add_tail(&chunk_rec->list, bad);
8312 chunk_item = next_cache_extent(chunk_item);
8315 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8318 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8326 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8330 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8341 static int check_device_used(struct device_record *dev_rec,
8342 struct device_extent_tree *dext_cache)
8344 struct cache_extent *cache;
8345 struct device_extent_record *dev_extent_rec;
8348 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8350 dev_extent_rec = container_of(cache,
8351 struct device_extent_record,
8353 if (dev_extent_rec->objectid != dev_rec->devid)
8356 list_del_init(&dev_extent_rec->device_list);
8357 total_byte += dev_extent_rec->length;
8358 cache = next_cache_extent(cache);
8361 if (total_byte != dev_rec->byte_used) {
8363 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8364 total_byte, dev_rec->byte_used, dev_rec->objectid,
8365 dev_rec->type, dev_rec->offset);
8372 /* check btrfs_dev_item -> btrfs_dev_extent */
8373 static int check_devices(struct rb_root *dev_cache,
8374 struct device_extent_tree *dev_extent_cache)
8376 struct rb_node *dev_node;
8377 struct device_record *dev_rec;
8378 struct device_extent_record *dext_rec;
8382 dev_node = rb_first(dev_cache);
8384 dev_rec = container_of(dev_node, struct device_record, node);
8385 err = check_device_used(dev_rec, dev_extent_cache);
8389 dev_node = rb_next(dev_node);
8391 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8394 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8395 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8402 static int add_root_item_to_list(struct list_head *head,
8403 u64 objectid, u64 bytenr, u64 last_snapshot,
8404 u8 level, u8 drop_level,
8405 int level_size, struct btrfs_key *drop_key)
8408 struct root_item_record *ri_rec;
8409 ri_rec = malloc(sizeof(*ri_rec));
8412 ri_rec->bytenr = bytenr;
8413 ri_rec->objectid = objectid;
8414 ri_rec->level = level;
8415 ri_rec->level_size = level_size;
8416 ri_rec->drop_level = drop_level;
8417 ri_rec->last_snapshot = last_snapshot;
8419 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8420 list_add_tail(&ri_rec->list, head);
8425 static void free_root_item_list(struct list_head *list)
8427 struct root_item_record *ri_rec;
8429 while (!list_empty(list)) {
8430 ri_rec = list_first_entry(list, struct root_item_record,
8432 list_del_init(&ri_rec->list);
8437 static int deal_root_from_list(struct list_head *list,
8438 struct btrfs_root *root,
8439 struct block_info *bits,
8441 struct cache_tree *pending,
8442 struct cache_tree *seen,
8443 struct cache_tree *reada,
8444 struct cache_tree *nodes,
8445 struct cache_tree *extent_cache,
8446 struct cache_tree *chunk_cache,
8447 struct rb_root *dev_cache,
8448 struct block_group_tree *block_group_cache,
8449 struct device_extent_tree *dev_extent_cache)
8454 while (!list_empty(list)) {
8455 struct root_item_record *rec;
8456 struct extent_buffer *buf;
8457 rec = list_entry(list->next,
8458 struct root_item_record, list);
8460 buf = read_tree_block(root->fs_info->tree_root,
8461 rec->bytenr, rec->level_size, 0);
8462 if (!extent_buffer_uptodate(buf)) {
8463 free_extent_buffer(buf);
8467 ret = add_root_to_pending(buf, extent_cache, pending,
8468 seen, nodes, rec->objectid);
8472 * To rebuild extent tree, we need deal with snapshot
8473 * one by one, otherwise we deal with node firstly which
8474 * can maximize readahead.
8477 ret = run_next_block(root, bits, bits_nr, &last,
8478 pending, seen, reada, nodes,
8479 extent_cache, chunk_cache,
8480 dev_cache, block_group_cache,
8481 dev_extent_cache, rec);
8485 free_extent_buffer(buf);
8486 list_del(&rec->list);
8492 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8493 reada, nodes, extent_cache, chunk_cache,
8494 dev_cache, block_group_cache,
8495 dev_extent_cache, NULL);
8505 static int check_chunks_and_extents(struct btrfs_root *root)
8507 struct rb_root dev_cache;
8508 struct cache_tree chunk_cache;
8509 struct block_group_tree block_group_cache;
8510 struct device_extent_tree dev_extent_cache;
8511 struct cache_tree extent_cache;
8512 struct cache_tree seen;
8513 struct cache_tree pending;
8514 struct cache_tree reada;
8515 struct cache_tree nodes;
8516 struct extent_io_tree excluded_extents;
8517 struct cache_tree corrupt_blocks;
8518 struct btrfs_path path;
8519 struct btrfs_key key;
8520 struct btrfs_key found_key;
8522 struct block_info *bits;
8524 struct extent_buffer *leaf;
8526 struct btrfs_root_item ri;
8527 struct list_head dropping_trees;
8528 struct list_head normal_trees;
8529 struct btrfs_root *root1;
8534 dev_cache = RB_ROOT;
8535 cache_tree_init(&chunk_cache);
8536 block_group_tree_init(&block_group_cache);
8537 device_extent_tree_init(&dev_extent_cache);
8539 cache_tree_init(&extent_cache);
8540 cache_tree_init(&seen);
8541 cache_tree_init(&pending);
8542 cache_tree_init(&nodes);
8543 cache_tree_init(&reada);
8544 cache_tree_init(&corrupt_blocks);
8545 extent_io_tree_init(&excluded_extents);
8546 INIT_LIST_HEAD(&dropping_trees);
8547 INIT_LIST_HEAD(&normal_trees);
8550 root->fs_info->excluded_extents = &excluded_extents;
8551 root->fs_info->fsck_extent_cache = &extent_cache;
8552 root->fs_info->free_extent_hook = free_extent_hook;
8553 root->fs_info->corrupt_blocks = &corrupt_blocks;
8557 bits = malloc(bits_nr * sizeof(struct block_info));
8563 if (ctx.progress_enabled) {
8564 ctx.tp = TASK_EXTENTS;
8565 task_start(ctx.info);
8569 root1 = root->fs_info->tree_root;
8570 level = btrfs_header_level(root1->node);
8571 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8572 root1->node->start, 0, level, 0,
8573 root1->nodesize, NULL);
8576 root1 = root->fs_info->chunk_root;
8577 level = btrfs_header_level(root1->node);
8578 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8579 root1->node->start, 0, level, 0,
8580 root1->nodesize, NULL);
8583 btrfs_init_path(&path);
8586 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8587 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8592 leaf = path.nodes[0];
8593 slot = path.slots[0];
8594 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8595 ret = btrfs_next_leaf(root, &path);
8598 leaf = path.nodes[0];
8599 slot = path.slots[0];
8601 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8602 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8603 unsigned long offset;
8606 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8607 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8608 last_snapshot = btrfs_root_last_snapshot(&ri);
8609 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8610 level = btrfs_root_level(&ri);
8611 level_size = root->nodesize;
8612 ret = add_root_item_to_list(&normal_trees,
8614 btrfs_root_bytenr(&ri),
8615 last_snapshot, level,
8616 0, level_size, NULL);
8620 level = btrfs_root_level(&ri);
8621 level_size = root->nodesize;
8622 objectid = found_key.objectid;
8623 btrfs_disk_key_to_cpu(&found_key,
8625 ret = add_root_item_to_list(&dropping_trees,
8627 btrfs_root_bytenr(&ri),
8628 last_snapshot, level,
8630 level_size, &found_key);
8637 btrfs_release_path(&path);
8640 * check_block can return -EAGAIN if it fixes something, please keep
8641 * this in mind when dealing with return values from these functions, if
8642 * we get -EAGAIN we want to fall through and restart the loop.
8644 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8645 &seen, &reada, &nodes, &extent_cache,
8646 &chunk_cache, &dev_cache, &block_group_cache,
8653 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8654 &pending, &seen, &reada, &nodes,
8655 &extent_cache, &chunk_cache, &dev_cache,
8656 &block_group_cache, &dev_extent_cache);
8663 ret = check_chunks(&chunk_cache, &block_group_cache,
8664 &dev_extent_cache, NULL, NULL, NULL, 0);
8671 ret = check_extent_refs(root, &extent_cache);
8678 ret = check_devices(&dev_cache, &dev_extent_cache);
8683 task_stop(ctx.info);
8685 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8686 extent_io_tree_cleanup(&excluded_extents);
8687 root->fs_info->fsck_extent_cache = NULL;
8688 root->fs_info->free_extent_hook = NULL;
8689 root->fs_info->corrupt_blocks = NULL;
8690 root->fs_info->excluded_extents = NULL;
8693 free_chunk_cache_tree(&chunk_cache);
8694 free_device_cache_tree(&dev_cache);
8695 free_block_group_tree(&block_group_cache);
8696 free_device_extent_tree(&dev_extent_cache);
8697 free_extent_cache_tree(&seen);
8698 free_extent_cache_tree(&pending);
8699 free_extent_cache_tree(&reada);
8700 free_extent_cache_tree(&nodes);
8703 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8704 free_extent_cache_tree(&seen);
8705 free_extent_cache_tree(&pending);
8706 free_extent_cache_tree(&reada);
8707 free_extent_cache_tree(&nodes);
8708 free_chunk_cache_tree(&chunk_cache);
8709 free_block_group_tree(&block_group_cache);
8710 free_device_cache_tree(&dev_cache);
8711 free_device_extent_tree(&dev_extent_cache);
8712 free_extent_record_cache(root->fs_info, &extent_cache);
8713 free_root_item_list(&normal_trees);
8714 free_root_item_list(&dropping_trees);
8715 extent_io_tree_cleanup(&excluded_extents);
8720 * Check backrefs of a tree block given by @bytenr or @eb.
8722 * @root: the root containing the @bytenr or @eb
8723 * @eb: tree block extent buffer, can be NULL
8724 * @bytenr: bytenr of the tree block to search
8725 * @level: tree level of the tree block
8726 * @owner: owner of the tree block
8728 * Return >0 for any error found and output error message
8729 * Return 0 for no error found
8731 static int check_tree_block_ref(struct btrfs_root *root,
8732 struct extent_buffer *eb, u64 bytenr,
8733 int level, u64 owner)
8735 struct btrfs_key key;
8736 struct btrfs_root *extent_root = root->fs_info->extent_root;
8737 struct btrfs_path path;
8738 struct btrfs_extent_item *ei;
8739 struct btrfs_extent_inline_ref *iref;
8740 struct extent_buffer *leaf;
8746 u32 nodesize = root->nodesize;
8753 btrfs_init_path(&path);
8754 key.objectid = bytenr;
8755 if (btrfs_fs_incompat(root->fs_info,
8756 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8757 key.type = BTRFS_METADATA_ITEM_KEY;
8759 key.type = BTRFS_EXTENT_ITEM_KEY;
8760 key.offset = (u64)-1;
8762 /* Search for the backref in extent tree */
8763 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8765 err |= BACKREF_MISSING;
8768 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8770 err |= BACKREF_MISSING;
8774 leaf = path.nodes[0];
8775 slot = path.slots[0];
8776 btrfs_item_key_to_cpu(leaf, &key, slot);
8778 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8780 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8781 skinny_level = (int)key.offset;
8782 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8784 struct btrfs_tree_block_info *info;
8786 info = (struct btrfs_tree_block_info *)(ei + 1);
8787 skinny_level = btrfs_tree_block_level(leaf, info);
8788 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8795 if (!(btrfs_extent_flags(leaf, ei) &
8796 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8798 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8799 key.objectid, nodesize,
8800 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8801 err = BACKREF_MISMATCH;
8803 header_gen = btrfs_header_generation(eb);
8804 extent_gen = btrfs_extent_generation(leaf, ei);
8805 if (header_gen != extent_gen) {
8807 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8808 key.objectid, nodesize, header_gen,
8810 err = BACKREF_MISMATCH;
8812 if (level != skinny_level) {
8814 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8815 key.objectid, nodesize, level, skinny_level);
8816 err = BACKREF_MISMATCH;
8818 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8820 "extent[%llu %u] is referred by other roots than %llu",
8821 key.objectid, nodesize, root->objectid);
8822 err = BACKREF_MISMATCH;
8827 * Iterate the extent/metadata item to find the exact backref
8829 item_size = btrfs_item_size_nr(leaf, slot);
8830 ptr = (unsigned long)iref;
8831 end = (unsigned long)ei + item_size;
8833 iref = (struct btrfs_extent_inline_ref *)ptr;
8834 type = btrfs_extent_inline_ref_type(leaf, iref);
8835 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8837 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8838 (offset == root->objectid || offset == owner)) {
8840 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8841 /* Check if the backref points to valid referencer */
8842 found_ref = !check_tree_block_ref(root, NULL, offset,
8848 ptr += btrfs_extent_inline_ref_size(type);
8852 * Inlined extent item doesn't have what we need, check
8853 * TREE_BLOCK_REF_KEY
8856 btrfs_release_path(&path);
8857 key.objectid = bytenr;
8858 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8859 key.offset = root->objectid;
8861 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8866 err |= BACKREF_MISSING;
8868 btrfs_release_path(&path);
8869 if (eb && (err & BACKREF_MISSING))
8870 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8871 bytenr, nodesize, owner, level);
8876 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8878 * Return >0 any error found and output error message
8879 * Return 0 for no error found
8881 static int check_extent_data_item(struct btrfs_root *root,
8882 struct extent_buffer *eb, int slot)
8884 struct btrfs_file_extent_item *fi;
8885 struct btrfs_path path;
8886 struct btrfs_root *extent_root = root->fs_info->extent_root;
8887 struct btrfs_key fi_key;
8888 struct btrfs_key dbref_key;
8889 struct extent_buffer *leaf;
8890 struct btrfs_extent_item *ei;
8891 struct btrfs_extent_inline_ref *iref;
8892 struct btrfs_extent_data_ref *dref;
8894 u64 file_extent_gen;
8897 u64 extent_num_bytes;
8905 int found_dbackref = 0;
8909 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8910 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8911 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8913 /* Nothing to check for hole and inline data extents */
8914 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8915 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8918 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8919 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8920 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8922 /* Check unaligned disk_num_bytes and num_bytes */
8923 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8925 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8926 fi_key.objectid, fi_key.offset, disk_num_bytes,
8928 err |= BYTES_UNALIGNED;
8930 data_bytes_allocated += disk_num_bytes;
8932 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8934 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8935 fi_key.objectid, fi_key.offset, extent_num_bytes,
8937 err |= BYTES_UNALIGNED;
8939 data_bytes_referenced += extent_num_bytes;
8941 owner = btrfs_header_owner(eb);
8943 /* Check the extent item of the file extent in extent tree */
8944 btrfs_init_path(&path);
8945 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8946 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8947 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8949 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8951 err |= BACKREF_MISSING;
8955 leaf = path.nodes[0];
8956 slot = path.slots[0];
8957 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8959 extent_flags = btrfs_extent_flags(leaf, ei);
8960 extent_gen = btrfs_extent_generation(leaf, ei);
8962 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8964 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8965 disk_bytenr, disk_num_bytes,
8966 BTRFS_EXTENT_FLAG_DATA);
8967 err |= BACKREF_MISMATCH;
8970 if (file_extent_gen < extent_gen) {
8972 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8973 disk_bytenr, disk_num_bytes, file_extent_gen,
8975 err |= BACKREF_MISMATCH;
8978 /* Check data backref inside that extent item */
8979 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8980 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8981 ptr = (unsigned long)iref;
8982 end = (unsigned long)ei + item_size;
8984 iref = (struct btrfs_extent_inline_ref *)ptr;
8985 type = btrfs_extent_inline_ref_type(leaf, iref);
8986 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8988 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8989 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8990 if (ref_root == owner || ref_root == root->objectid)
8992 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8993 found_dbackref = !check_tree_block_ref(root, NULL,
8994 btrfs_extent_inline_ref_offset(leaf, iref),
9000 ptr += btrfs_extent_inline_ref_size(type);
9003 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
9004 if (!found_dbackref) {
9005 btrfs_release_path(&path);
9007 btrfs_init_path(&path);
9008 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
9009 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
9010 dbref_key.offset = hash_extent_data_ref(root->objectid,
9011 fi_key.objectid, fi_key.offset);
9013 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
9014 &dbref_key, &path, 0, 0);
9019 if (!found_dbackref)
9020 err |= BACKREF_MISSING;
9022 btrfs_release_path(&path);
9023 if (err & BACKREF_MISSING) {
9024 error("data extent[%llu %llu] backref lost",
9025 disk_bytenr, disk_num_bytes);
9031 * Get real tree block level for the case like shared block
9032 * Return >= 0 as tree level
9033 * Return <0 for error
9035 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
9037 struct extent_buffer *eb;
9038 struct btrfs_path path;
9039 struct btrfs_key key;
9040 struct btrfs_extent_item *ei;
9043 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9048 /* Search extent tree for extent generation and level */
9049 key.objectid = bytenr;
9050 key.type = BTRFS_METADATA_ITEM_KEY;
9051 key.offset = (u64)-1;
9053 btrfs_init_path(&path);
9054 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
9057 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
9065 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9066 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
9067 struct btrfs_extent_item);
9068 flags = btrfs_extent_flags(path.nodes[0], ei);
9069 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
9074 /* Get transid for later read_tree_block() check */
9075 transid = btrfs_extent_generation(path.nodes[0], ei);
9077 /* Get backref level as one source */
9078 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9079 backref_level = key.offset;
9081 struct btrfs_tree_block_info *info;
9083 info = (struct btrfs_tree_block_info *)(ei + 1);
9084 backref_level = btrfs_tree_block_level(path.nodes[0], info);
9086 btrfs_release_path(&path);
9088 /* Get level from tree block as an alternative source */
9089 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
9090 if (!extent_buffer_uptodate(eb)) {
9091 free_extent_buffer(eb);
9094 header_level = btrfs_header_level(eb);
9095 free_extent_buffer(eb);
9097 if (header_level != backref_level)
9099 return header_level;
9102 btrfs_release_path(&path);
9107 * Check if a tree block backref is valid (points to a valid tree block)
9108 * if level == -1, level will be resolved
9109 * Return >0 for any error found and print error message
9111 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
9112 u64 bytenr, int level)
9114 struct btrfs_root *root;
9115 struct btrfs_key key;
9116 struct btrfs_path path;
9117 struct extent_buffer *eb;
9118 struct extent_buffer *node;
9119 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9123 /* Query level for level == -1 special case */
9125 level = query_tree_block_level(fs_info, bytenr);
9127 err |= REFERENCER_MISSING;
9131 key.objectid = root_id;
9132 key.type = BTRFS_ROOT_ITEM_KEY;
9133 key.offset = (u64)-1;
9135 root = btrfs_read_fs_root(fs_info, &key);
9137 err |= REFERENCER_MISSING;
9141 /* Read out the tree block to get item/node key */
9142 eb = read_tree_block(root, bytenr, root->nodesize, 0);
9143 if (!extent_buffer_uptodate(eb)) {
9144 err |= REFERENCER_MISSING;
9145 free_extent_buffer(eb);
9149 /* Empty tree, no need to check key */
9150 if (!btrfs_header_nritems(eb) && !level) {
9151 free_extent_buffer(eb);
9156 btrfs_node_key_to_cpu(eb, &key, 0);
9158 btrfs_item_key_to_cpu(eb, &key, 0);
9160 free_extent_buffer(eb);
9162 btrfs_init_path(&path);
9163 /* Search with the first key, to ensure we can reach it */
9164 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9166 err |= REFERENCER_MISSING;
9170 node = path.nodes[level];
9171 if (btrfs_header_bytenr(node) != bytenr) {
9173 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9174 bytenr, nodesize, bytenr,
9175 btrfs_header_bytenr(node));
9176 err |= REFERENCER_MISMATCH;
9178 if (btrfs_header_level(node) != level) {
9180 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9181 bytenr, nodesize, level,
9182 btrfs_header_level(node));
9183 err |= REFERENCER_MISMATCH;
9187 btrfs_release_path(&path);
9189 if (err & REFERENCER_MISSING) {
9191 error("extent [%llu %d] lost referencer (owner: %llu)",
9192 bytenr, nodesize, root_id);
9195 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9196 bytenr, nodesize, root_id, level);
9203 * Check referencer for shared block backref
9204 * If level == -1, this function will resolve the level.
9206 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9207 u64 parent, u64 bytenr, int level)
9209 struct extent_buffer *eb;
9210 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9212 int found_parent = 0;
9215 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9216 if (!extent_buffer_uptodate(eb))
9220 level = query_tree_block_level(fs_info, bytenr);
9224 if (level + 1 != btrfs_header_level(eb))
9227 nr = btrfs_header_nritems(eb);
9228 for (i = 0; i < nr; i++) {
9229 if (bytenr == btrfs_node_blockptr(eb, i)) {
9235 free_extent_buffer(eb);
9236 if (!found_parent) {
9238 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9239 bytenr, nodesize, parent, level);
9240 return REFERENCER_MISSING;
9246 * Check referencer for normal (inlined) data ref
9247 * If len == 0, it will be resolved by searching in extent tree
9249 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9250 u64 root_id, u64 objectid, u64 offset,
9251 u64 bytenr, u64 len, u32 count)
9253 struct btrfs_root *root;
9254 struct btrfs_root *extent_root = fs_info->extent_root;
9255 struct btrfs_key key;
9256 struct btrfs_path path;
9257 struct extent_buffer *leaf;
9258 struct btrfs_file_extent_item *fi;
9259 u32 found_count = 0;
9264 key.objectid = bytenr;
9265 key.type = BTRFS_EXTENT_ITEM_KEY;
9266 key.offset = (u64)-1;
9268 btrfs_init_path(&path);
9269 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9272 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9275 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9276 if (key.objectid != bytenr ||
9277 key.type != BTRFS_EXTENT_ITEM_KEY)
9280 btrfs_release_path(&path);
9282 key.objectid = root_id;
9283 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9284 key.offset = (u64)-1;
9285 btrfs_init_path(&path);
9287 root = btrfs_read_fs_root(fs_info, &key);
9291 key.objectid = objectid;
9292 key.type = BTRFS_EXTENT_DATA_KEY;
9294 * It can be nasty as data backref offset is
9295 * file offset - file extent offset, which is smaller or
9296 * equal to original backref offset. The only special case is
9297 * overflow. So we need to special check and do further search.
9299 key.offset = offset & (1ULL << 63) ? 0 : offset;
9301 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9306 * Search afterwards to get correct one
9307 * NOTE: As we must do a comprehensive check on the data backref to
9308 * make sure the dref count also matches, we must iterate all file
9309 * extents for that inode.
9312 leaf = path.nodes[0];
9313 slot = path.slots[0];
9315 btrfs_item_key_to_cpu(leaf, &key, slot);
9316 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9318 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9320 * Except normal disk bytenr and disk num bytes, we still
9321 * need to do extra check on dbackref offset as
9322 * dbackref offset = file_offset - file_extent_offset
9324 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9325 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9326 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9330 ret = btrfs_next_item(root, &path);
9335 btrfs_release_path(&path);
9336 if (found_count != count) {
9338 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9339 bytenr, len, root_id, objectid, offset, count, found_count);
9340 return REFERENCER_MISSING;
9346 * Check if the referencer of a shared data backref exists
9348 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9349 u64 parent, u64 bytenr)
9351 struct extent_buffer *eb;
9352 struct btrfs_key key;
9353 struct btrfs_file_extent_item *fi;
9354 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9356 int found_parent = 0;
9359 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9360 if (!extent_buffer_uptodate(eb))
9363 nr = btrfs_header_nritems(eb);
9364 for (i = 0; i < nr; i++) {
9365 btrfs_item_key_to_cpu(eb, &key, i);
9366 if (key.type != BTRFS_EXTENT_DATA_KEY)
9369 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9370 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9373 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9380 free_extent_buffer(eb);
9381 if (!found_parent) {
9382 error("shared extent %llu referencer lost (parent: %llu)",
9384 return REFERENCER_MISSING;
9390 * This function will check a given extent item, including its backref and
9391 * itself (like crossing stripe boundary and type)
9393 * Since we don't use extent_record anymore, introduce new error bit
9395 static int check_extent_item(struct btrfs_fs_info *fs_info,
9396 struct extent_buffer *eb, int slot)
9398 struct btrfs_extent_item *ei;
9399 struct btrfs_extent_inline_ref *iref;
9400 struct btrfs_extent_data_ref *dref;
9404 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9405 u32 item_size = btrfs_item_size_nr(eb, slot);
9410 struct btrfs_key key;
9414 btrfs_item_key_to_cpu(eb, &key, slot);
9415 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9416 bytes_used += key.offset;
9418 bytes_used += nodesize;
9420 if (item_size < sizeof(*ei)) {
9422 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9423 * old thing when on disk format is still un-determined.
9424 * No need to care about it anymore
9426 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9430 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9431 flags = btrfs_extent_flags(eb, ei);
9433 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9435 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9436 error("bad metadata [%llu, %llu) crossing stripe boundary",
9437 key.objectid, key.objectid + nodesize);
9438 err |= CROSSING_STRIPE_BOUNDARY;
9441 ptr = (unsigned long)(ei + 1);
9443 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9444 /* Old EXTENT_ITEM metadata */
9445 struct btrfs_tree_block_info *info;
9447 info = (struct btrfs_tree_block_info *)ptr;
9448 level = btrfs_tree_block_level(eb, info);
9449 ptr += sizeof(struct btrfs_tree_block_info);
9451 /* New METADATA_ITEM */
9454 end = (unsigned long)ei + item_size;
9457 err |= ITEM_SIZE_MISMATCH;
9461 /* Now check every backref in this extent item */
9463 iref = (struct btrfs_extent_inline_ref *)ptr;
9464 type = btrfs_extent_inline_ref_type(eb, iref);
9465 offset = btrfs_extent_inline_ref_offset(eb, iref);
9467 case BTRFS_TREE_BLOCK_REF_KEY:
9468 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9472 case BTRFS_SHARED_BLOCK_REF_KEY:
9473 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9477 case BTRFS_EXTENT_DATA_REF_KEY:
9478 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9479 ret = check_extent_data_backref(fs_info,
9480 btrfs_extent_data_ref_root(eb, dref),
9481 btrfs_extent_data_ref_objectid(eb, dref),
9482 btrfs_extent_data_ref_offset(eb, dref),
9483 key.objectid, key.offset,
9484 btrfs_extent_data_ref_count(eb, dref));
9487 case BTRFS_SHARED_DATA_REF_KEY:
9488 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9492 error("extent[%llu %d %llu] has unknown ref type: %d",
9493 key.objectid, key.type, key.offset, type);
9494 err |= UNKNOWN_TYPE;
9498 ptr += btrfs_extent_inline_ref_size(type);
9507 * Check if a dev extent item is referred correctly by its chunk
9509 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9510 struct extent_buffer *eb, int slot)
9512 struct btrfs_root *chunk_root = fs_info->chunk_root;
9513 struct btrfs_dev_extent *ptr;
9514 struct btrfs_path path;
9515 struct btrfs_key chunk_key;
9516 struct btrfs_key devext_key;
9517 struct btrfs_chunk *chunk;
9518 struct extent_buffer *l;
9522 int found_chunk = 0;
9525 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9526 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9527 length = btrfs_dev_extent_length(eb, ptr);
9529 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9530 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9531 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9533 btrfs_init_path(&path);
9534 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9539 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9540 if (btrfs_chunk_length(l, chunk) != length)
9543 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9544 for (i = 0; i < num_stripes; i++) {
9545 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9546 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9548 if (devid == devext_key.objectid &&
9549 offset == devext_key.offset) {
9555 btrfs_release_path(&path);
9558 "device extent[%llu, %llu, %llu] did not find the related chunk",
9559 devext_key.objectid, devext_key.offset, length);
9560 return REFERENCER_MISSING;
9566 * Check if the used space is correct with the dev item
9568 static int check_dev_item(struct btrfs_fs_info *fs_info,
9569 struct extent_buffer *eb, int slot)
9571 struct btrfs_root *dev_root = fs_info->dev_root;
9572 struct btrfs_dev_item *dev_item;
9573 struct btrfs_path path;
9574 struct btrfs_key key;
9575 struct btrfs_dev_extent *ptr;
9581 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9582 dev_id = btrfs_device_id(eb, dev_item);
9583 used = btrfs_device_bytes_used(eb, dev_item);
9585 key.objectid = dev_id;
9586 key.type = BTRFS_DEV_EXTENT_KEY;
9589 btrfs_init_path(&path);
9590 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9592 btrfs_item_key_to_cpu(eb, &key, slot);
9593 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9594 key.objectid, key.type, key.offset);
9595 btrfs_release_path(&path);
9596 return REFERENCER_MISSING;
9599 /* Iterate dev_extents to calculate the used space of a device */
9601 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9603 if (key.objectid > dev_id)
9605 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9608 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9609 struct btrfs_dev_extent);
9610 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9612 ret = btrfs_next_item(dev_root, &path);
9616 btrfs_release_path(&path);
9618 if (used != total) {
9619 btrfs_item_key_to_cpu(eb, &key, slot);
9621 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9622 total, used, BTRFS_ROOT_TREE_OBJECTID,
9623 BTRFS_DEV_EXTENT_KEY, dev_id);
9624 return ACCOUNTING_MISMATCH;
9630 * Check a block group item with its referener (chunk) and its used space
9631 * with extent/metadata item
9633 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9634 struct extent_buffer *eb, int slot)
9636 struct btrfs_root *extent_root = fs_info->extent_root;
9637 struct btrfs_root *chunk_root = fs_info->chunk_root;
9638 struct btrfs_block_group_item *bi;
9639 struct btrfs_block_group_item bg_item;
9640 struct btrfs_path path;
9641 struct btrfs_key bg_key;
9642 struct btrfs_key chunk_key;
9643 struct btrfs_key extent_key;
9644 struct btrfs_chunk *chunk;
9645 struct extent_buffer *leaf;
9646 struct btrfs_extent_item *ei;
9647 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9655 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9656 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9657 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9658 used = btrfs_block_group_used(&bg_item);
9659 bg_flags = btrfs_block_group_flags(&bg_item);
9661 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9662 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9663 chunk_key.offset = bg_key.objectid;
9665 btrfs_init_path(&path);
9666 /* Search for the referencer chunk */
9667 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9670 "block group[%llu %llu] did not find the related chunk item",
9671 bg_key.objectid, bg_key.offset);
9672 err |= REFERENCER_MISSING;
9674 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9675 struct btrfs_chunk);
9676 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9679 "block group[%llu %llu] related chunk item length does not match",
9680 bg_key.objectid, bg_key.offset);
9681 err |= REFERENCER_MISMATCH;
9684 btrfs_release_path(&path);
9686 /* Search from the block group bytenr */
9687 extent_key.objectid = bg_key.objectid;
9688 extent_key.type = 0;
9689 extent_key.offset = 0;
9691 btrfs_init_path(&path);
9692 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9696 /* Iterate extent tree to account used space */
9698 leaf = path.nodes[0];
9699 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9700 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9703 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9704 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9706 if (extent_key.objectid < bg_key.objectid)
9709 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9712 total += extent_key.offset;
9714 ei = btrfs_item_ptr(leaf, path.slots[0],
9715 struct btrfs_extent_item);
9716 flags = btrfs_extent_flags(leaf, ei);
9717 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9718 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9720 "bad extent[%llu, %llu) type mismatch with chunk",
9721 extent_key.objectid,
9722 extent_key.objectid + extent_key.offset);
9723 err |= CHUNK_TYPE_MISMATCH;
9725 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9726 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9727 BTRFS_BLOCK_GROUP_METADATA))) {
9729 "bad extent[%llu, %llu) type mismatch with chunk",
9730 extent_key.objectid,
9731 extent_key.objectid + nodesize);
9732 err |= CHUNK_TYPE_MISMATCH;
9736 ret = btrfs_next_item(extent_root, &path);
9742 btrfs_release_path(&path);
9744 if (total != used) {
9746 "block group[%llu %llu] used %llu but extent items used %llu",
9747 bg_key.objectid, bg_key.offset, used, total);
9748 err |= ACCOUNTING_MISMATCH;
9754 * Check a chunk item.
9755 * Including checking all referred dev_extents and block group
9757 static int check_chunk_item(struct btrfs_fs_info *fs_info,
9758 struct extent_buffer *eb, int slot)
9760 struct btrfs_root *extent_root = fs_info->extent_root;
9761 struct btrfs_root *dev_root = fs_info->dev_root;
9762 struct btrfs_path path;
9763 struct btrfs_key chunk_key;
9764 struct btrfs_key bg_key;
9765 struct btrfs_key devext_key;
9766 struct btrfs_chunk *chunk;
9767 struct extent_buffer *leaf;
9768 struct btrfs_block_group_item *bi;
9769 struct btrfs_block_group_item bg_item;
9770 struct btrfs_dev_extent *ptr;
9771 u32 sectorsize = btrfs_super_sectorsize(fs_info->super_copy);
9783 btrfs_item_key_to_cpu(eb, &chunk_key, slot);
9784 chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
9785 length = btrfs_chunk_length(eb, chunk);
9786 chunk_end = chunk_key.offset + length;
9787 if (!IS_ALIGNED(length, sectorsize)) {
9788 error("chunk[%llu %llu) not aligned to %u",
9789 chunk_key.offset, chunk_end, sectorsize);
9790 err |= BYTES_UNALIGNED;
9794 type = btrfs_chunk_type(eb, chunk);
9795 profile = type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
9796 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
9797 error("chunk[%llu %llu) has no chunk type",
9798 chunk_key.offset, chunk_end);
9799 err |= UNKNOWN_TYPE;
9801 if (profile && (profile & (profile - 1))) {
9802 error("chunk[%llu %llu) multiple profiles detected: %llx",
9803 chunk_key.offset, chunk_end, profile);
9804 err |= UNKNOWN_TYPE;
9807 bg_key.objectid = chunk_key.offset;
9808 bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
9809 bg_key.offset = length;
9811 btrfs_init_path(&path);
9812 ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
9815 "chunk[%llu %llu) did not find the related block group item",
9816 chunk_key.offset, chunk_end);
9817 err |= REFERENCER_MISSING;
9819 leaf = path.nodes[0];
9820 bi = btrfs_item_ptr(leaf, path.slots[0],
9821 struct btrfs_block_group_item);
9822 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
9824 if (btrfs_block_group_flags(&bg_item) != type) {
9826 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
9827 chunk_key.offset, chunk_end, type,
9828 btrfs_block_group_flags(&bg_item));
9829 err |= REFERENCER_MISSING;
9833 num_stripes = btrfs_chunk_num_stripes(eb, chunk);
9834 for (i = 0; i < num_stripes; i++) {
9835 btrfs_release_path(&path);
9836 btrfs_init_path(&path);
9837 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
9838 devext_key.type = BTRFS_DEV_EXTENT_KEY;
9839 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
9841 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
9846 leaf = path.nodes[0];
9847 ptr = btrfs_item_ptr(leaf, path.slots[0],
9848 struct btrfs_dev_extent);
9849 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
9850 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
9851 if (objectid != chunk_key.objectid ||
9852 offset != chunk_key.offset ||
9853 btrfs_dev_extent_length(leaf, ptr) != length)
9857 err |= BACKREF_MISSING;
9859 "chunk[%llu %llu) stripe %d did not find the related dev extent",
9860 chunk_key.objectid, chunk_end, i);
9863 btrfs_release_path(&path);
9869 * Main entry function to check known items and update related accounting info
9871 static int check_leaf_items(struct btrfs_root *root, struct extent_buffer *eb)
9873 struct btrfs_fs_info *fs_info = root->fs_info;
9874 struct btrfs_key key;
9877 struct btrfs_extent_data_ref *dref;
9882 btrfs_item_key_to_cpu(eb, &key, slot);
9883 type = btrfs_key_type(&key);
9886 case BTRFS_EXTENT_DATA_KEY:
9887 ret = check_extent_data_item(root, eb, slot);
9890 case BTRFS_BLOCK_GROUP_ITEM_KEY:
9891 ret = check_block_group_item(fs_info, eb, slot);
9894 case BTRFS_DEV_ITEM_KEY:
9895 ret = check_dev_item(fs_info, eb, slot);
9898 case BTRFS_CHUNK_ITEM_KEY:
9899 ret = check_chunk_item(fs_info, eb, slot);
9902 case BTRFS_DEV_EXTENT_KEY:
9903 ret = check_dev_extent_item(fs_info, eb, slot);
9906 case BTRFS_EXTENT_ITEM_KEY:
9907 case BTRFS_METADATA_ITEM_KEY:
9908 ret = check_extent_item(fs_info, eb, slot);
9911 case BTRFS_EXTENT_CSUM_KEY:
9912 total_csum_bytes += btrfs_item_size_nr(eb, slot);
9914 case BTRFS_TREE_BLOCK_REF_KEY:
9915 ret = check_tree_block_backref(fs_info, key.offset,
9919 case BTRFS_EXTENT_DATA_REF_KEY:
9920 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
9921 ret = check_extent_data_backref(fs_info,
9922 btrfs_extent_data_ref_root(eb, dref),
9923 btrfs_extent_data_ref_objectid(eb, dref),
9924 btrfs_extent_data_ref_offset(eb, dref),
9926 btrfs_extent_data_ref_count(eb, dref));
9929 case BTRFS_SHARED_BLOCK_REF_KEY:
9930 ret = check_shared_block_backref(fs_info, key.offset,
9934 case BTRFS_SHARED_DATA_REF_KEY:
9935 ret = check_shared_data_backref(fs_info, key.offset,
9943 if (++slot < btrfs_header_nritems(eb))
9950 * Helper function for later fs/subvol tree check. To determine if a tree
9951 * block should be checked.
9952 * This function will ensure only the direct referencer with lowest rootid to
9953 * check a fs/subvolume tree block.
9955 * Backref check at extent tree would detect errors like missing subvolume
9956 * tree, so we can do aggressive check to reduce duplicated checks.
9958 static int should_check(struct btrfs_root *root, struct extent_buffer *eb)
9960 struct btrfs_root *extent_root = root->fs_info->extent_root;
9961 struct btrfs_key key;
9962 struct btrfs_path path;
9963 struct extent_buffer *leaf;
9965 struct btrfs_extent_item *ei;
9971 struct btrfs_extent_inline_ref *iref;
9974 btrfs_init_path(&path);
9975 key.objectid = btrfs_header_bytenr(eb);
9976 key.type = BTRFS_METADATA_ITEM_KEY;
9977 key.offset = (u64)-1;
9980 * Any failure in backref resolving means we can't determine
9981 * whom the tree block belongs to.
9982 * So in that case, we need to check that tree block
9984 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9988 ret = btrfs_previous_extent_item(extent_root, &path,
9989 btrfs_header_bytenr(eb));
9993 leaf = path.nodes[0];
9994 slot = path.slots[0];
9995 btrfs_item_key_to_cpu(leaf, &key, slot);
9996 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9998 if (key.type == BTRFS_METADATA_ITEM_KEY) {
9999 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10001 struct btrfs_tree_block_info *info;
10003 info = (struct btrfs_tree_block_info *)(ei + 1);
10004 iref = (struct btrfs_extent_inline_ref *)(info + 1);
10007 item_size = btrfs_item_size_nr(leaf, slot);
10008 ptr = (unsigned long)iref;
10009 end = (unsigned long)ei + item_size;
10010 while (ptr < end) {
10011 iref = (struct btrfs_extent_inline_ref *)ptr;
10012 type = btrfs_extent_inline_ref_type(leaf, iref);
10013 offset = btrfs_extent_inline_ref_offset(leaf, iref);
10016 * We only check the tree block if current root is
10017 * the lowest referencer of it.
10019 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
10020 offset < root->objectid) {
10021 btrfs_release_path(&path);
10025 ptr += btrfs_extent_inline_ref_size(type);
10028 * Normally we should also check keyed tree block ref, but that may be
10029 * very time consuming. Inlined ref should already make us skip a lot
10030 * of refs now. So skip search keyed tree block ref.
10034 btrfs_release_path(&path);
10039 * Traversal function for tree block. We will do:
10040 * 1) Skip shared fs/subvolume tree blocks
10041 * 2) Update related bytes accounting
10042 * 3) Pre-order traversal
10044 static int traverse_tree_block(struct btrfs_root *root,
10045 struct extent_buffer *node)
10047 struct extent_buffer *eb;
10055 * Skip shared fs/subvolume tree block, in that case they will
10056 * be checked by referencer with lowest rootid
10058 if (is_fstree(root->objectid) && !should_check(root, node))
10061 /* Update bytes accounting */
10062 total_btree_bytes += node->len;
10063 if (fs_root_objectid(btrfs_header_owner(node)))
10064 total_fs_tree_bytes += node->len;
10065 if (btrfs_header_owner(node) == BTRFS_EXTENT_TREE_OBJECTID)
10066 total_extent_tree_bytes += node->len;
10067 if (!found_old_backref &&
10068 btrfs_header_owner(node) == BTRFS_TREE_RELOC_OBJECTID &&
10069 btrfs_header_backref_rev(node) == BTRFS_MIXED_BACKREF_REV &&
10070 !btrfs_header_flag(node, BTRFS_HEADER_FLAG_RELOC))
10071 found_old_backref = 1;
10073 /* pre-order tranversal, check itself first */
10074 level = btrfs_header_level(node);
10075 ret = check_tree_block_ref(root, node, btrfs_header_bytenr(node),
10076 btrfs_header_level(node),
10077 btrfs_header_owner(node));
10081 "check %s failed root %llu bytenr %llu level %d, force continue check",
10082 level ? "node":"leaf", root->objectid,
10083 btrfs_header_bytenr(node), btrfs_header_level(node));
10086 btree_space_waste += btrfs_leaf_free_space(root, node);
10087 ret = check_leaf_items(root, node);
10092 nr = btrfs_header_nritems(node);
10093 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) - nr) *
10094 sizeof(struct btrfs_key_ptr);
10096 /* Then check all its children */
10097 for (i = 0; i < nr; i++) {
10098 u64 blocknr = btrfs_node_blockptr(node, i);
10101 * As a btrfs tree has most 8 levels (0..7), so it's quite safe
10102 * to call the function itself.
10104 eb = read_tree_block(root, blocknr, root->nodesize, 0);
10105 if (extent_buffer_uptodate(eb)) {
10106 ret = traverse_tree_block(root, eb);
10109 free_extent_buffer(eb);
10116 * Low memory usage version check_chunks_and_extents.
10118 static int check_chunks_and_extents_v2(struct btrfs_root *root)
10120 struct btrfs_path path;
10121 struct btrfs_key key;
10122 struct btrfs_root *root1;
10123 struct btrfs_root *cur_root;
10127 root1 = root->fs_info->chunk_root;
10128 ret = traverse_tree_block(root1, root1->node);
10131 root1 = root->fs_info->tree_root;
10132 ret = traverse_tree_block(root1, root1->node);
10135 btrfs_init_path(&path);
10136 key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
10138 key.type = BTRFS_ROOT_ITEM_KEY;
10140 ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
10142 error("cannot find extent treet in tree_root");
10147 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
10148 if (key.type != BTRFS_ROOT_ITEM_KEY)
10150 key.offset = (u64)-1;
10152 cur_root = btrfs_read_fs_root(root->fs_info, &key);
10153 if (IS_ERR(cur_root) || !cur_root) {
10154 error("failed to read tree: %lld", key.objectid);
10158 ret = traverse_tree_block(cur_root, cur_root->node);
10162 ret = btrfs_next_item(root1, &path);
10168 btrfs_release_path(&path);
10172 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
10173 struct btrfs_root *root, int overwrite)
10175 struct extent_buffer *c;
10176 struct extent_buffer *old = root->node;
10179 struct btrfs_disk_key disk_key = {0,0,0};
10185 extent_buffer_get(c);
10188 c = btrfs_alloc_free_block(trans, root,
10190 root->root_key.objectid,
10191 &disk_key, level, 0, 0);
10194 extent_buffer_get(c);
10198 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
10199 btrfs_set_header_level(c, level);
10200 btrfs_set_header_bytenr(c, c->start);
10201 btrfs_set_header_generation(c, trans->transid);
10202 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
10203 btrfs_set_header_owner(c, root->root_key.objectid);
10205 write_extent_buffer(c, root->fs_info->fsid,
10206 btrfs_header_fsid(), BTRFS_FSID_SIZE);
10208 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
10209 btrfs_header_chunk_tree_uuid(c),
10212 btrfs_mark_buffer_dirty(c);
10214 * this case can happen in the following case:
10216 * 1.overwrite previous root.
10218 * 2.reinit reloc data root, this is because we skip pin
10219 * down reloc data tree before which means we can allocate
10220 * same block bytenr here.
10222 if (old->start == c->start) {
10223 btrfs_set_root_generation(&root->root_item,
10225 root->root_item.level = btrfs_header_level(root->node);
10226 ret = btrfs_update_root(trans, root->fs_info->tree_root,
10227 &root->root_key, &root->root_item);
10229 free_extent_buffer(c);
10233 free_extent_buffer(old);
10235 add_root_to_dirty_list(root);
10239 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
10240 struct extent_buffer *eb, int tree_root)
10242 struct extent_buffer *tmp;
10243 struct btrfs_root_item *ri;
10244 struct btrfs_key key;
10247 int level = btrfs_header_level(eb);
10253 * If we have pinned this block before, don't pin it again.
10254 * This can not only avoid forever loop with broken filesystem
10255 * but also give us some speedups.
10257 if (test_range_bit(&fs_info->pinned_extents, eb->start,
10258 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
10261 btrfs_pin_extent(fs_info, eb->start, eb->len);
10263 nodesize = btrfs_super_nodesize(fs_info->super_copy);
10264 nritems = btrfs_header_nritems(eb);
10265 for (i = 0; i < nritems; i++) {
10267 btrfs_item_key_to_cpu(eb, &key, i);
10268 if (key.type != BTRFS_ROOT_ITEM_KEY)
10270 /* Skip the extent root and reloc roots */
10271 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
10272 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10273 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
10275 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
10276 bytenr = btrfs_disk_root_bytenr(eb, ri);
10279 * If at any point we start needing the real root we
10280 * will have to build a stump root for the root we are
10281 * in, but for now this doesn't actually use the root so
10282 * just pass in extent_root.
10284 tmp = read_tree_block(fs_info->extent_root, bytenr,
10286 if (!extent_buffer_uptodate(tmp)) {
10287 fprintf(stderr, "Error reading root block\n");
10290 ret = pin_down_tree_blocks(fs_info, tmp, 0);
10291 free_extent_buffer(tmp);
10295 bytenr = btrfs_node_blockptr(eb, i);
10297 /* If we aren't the tree root don't read the block */
10298 if (level == 1 && !tree_root) {
10299 btrfs_pin_extent(fs_info, bytenr, nodesize);
10303 tmp = read_tree_block(fs_info->extent_root, bytenr,
10305 if (!extent_buffer_uptodate(tmp)) {
10306 fprintf(stderr, "Error reading tree block\n");
10309 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
10310 free_extent_buffer(tmp);
10319 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
10323 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
10327 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
10330 static int reset_block_groups(struct btrfs_fs_info *fs_info)
10332 struct btrfs_block_group_cache *cache;
10333 struct btrfs_path *path;
10334 struct extent_buffer *leaf;
10335 struct btrfs_chunk *chunk;
10336 struct btrfs_key key;
10340 path = btrfs_alloc_path();
10345 key.type = BTRFS_CHUNK_ITEM_KEY;
10348 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
10350 btrfs_free_path(path);
10355 * We do this in case the block groups were screwed up and had alloc
10356 * bits that aren't actually set on the chunks. This happens with
10357 * restored images every time and could happen in real life I guess.
10359 fs_info->avail_data_alloc_bits = 0;
10360 fs_info->avail_metadata_alloc_bits = 0;
10361 fs_info->avail_system_alloc_bits = 0;
10363 /* First we need to create the in-memory block groups */
10365 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10366 ret = btrfs_next_leaf(fs_info->chunk_root, path);
10368 btrfs_free_path(path);
10376 leaf = path->nodes[0];
10377 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10378 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
10383 chunk = btrfs_item_ptr(leaf, path->slots[0],
10384 struct btrfs_chunk);
10385 btrfs_add_block_group(fs_info, 0,
10386 btrfs_chunk_type(leaf, chunk),
10387 key.objectid, key.offset,
10388 btrfs_chunk_length(leaf, chunk));
10389 set_extent_dirty(&fs_info->free_space_cache, key.offset,
10390 key.offset + btrfs_chunk_length(leaf, chunk),
10396 cache = btrfs_lookup_first_block_group(fs_info, start);
10400 start = cache->key.objectid + cache->key.offset;
10403 btrfs_free_path(path);
10407 static int reset_balance(struct btrfs_trans_handle *trans,
10408 struct btrfs_fs_info *fs_info)
10410 struct btrfs_root *root = fs_info->tree_root;
10411 struct btrfs_path *path;
10412 struct extent_buffer *leaf;
10413 struct btrfs_key key;
10414 int del_slot, del_nr = 0;
10418 path = btrfs_alloc_path();
10422 key.objectid = BTRFS_BALANCE_OBJECTID;
10423 key.type = BTRFS_BALANCE_ITEM_KEY;
10426 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10431 goto reinit_data_reloc;
10436 ret = btrfs_del_item(trans, root, path);
10439 btrfs_release_path(path);
10441 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
10442 key.type = BTRFS_ROOT_ITEM_KEY;
10445 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
10449 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10454 ret = btrfs_del_items(trans, root, path,
10461 btrfs_release_path(path);
10464 ret = btrfs_search_slot(trans, root, &key, path,
10471 leaf = path->nodes[0];
10472 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10473 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
10475 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10480 del_slot = path->slots[0];
10489 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
10493 btrfs_release_path(path);
10496 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
10497 key.type = BTRFS_ROOT_ITEM_KEY;
10498 key.offset = (u64)-1;
10499 root = btrfs_read_fs_root(fs_info, &key);
10500 if (IS_ERR(root)) {
10501 fprintf(stderr, "Error reading data reloc tree\n");
10502 ret = PTR_ERR(root);
10505 record_root_in_trans(trans, root);
10506 ret = btrfs_fsck_reinit_root(trans, root, 0);
10509 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
10511 btrfs_free_path(path);
10515 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
10516 struct btrfs_fs_info *fs_info)
10522 * The only reason we don't do this is because right now we're just
10523 * walking the trees we find and pinning down their bytes, we don't look
10524 * at any of the leaves. In order to do mixed groups we'd have to check
10525 * the leaves of any fs roots and pin down the bytes for any file
10526 * extents we find. Not hard but why do it if we don't have to?
10528 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
10529 fprintf(stderr, "We don't support re-initing the extent tree "
10530 "for mixed block groups yet, please notify a btrfs "
10531 "developer you want to do this so they can add this "
10532 "functionality.\n");
10537 * first we need to walk all of the trees except the extent tree and pin
10538 * down the bytes that are in use so we don't overwrite any existing
10541 ret = pin_metadata_blocks(fs_info);
10543 fprintf(stderr, "error pinning down used bytes\n");
10548 * Need to drop all the block groups since we're going to recreate all
10551 btrfs_free_block_groups(fs_info);
10552 ret = reset_block_groups(fs_info);
10554 fprintf(stderr, "error resetting the block groups\n");
10558 /* Ok we can allocate now, reinit the extent root */
10559 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
10561 fprintf(stderr, "extent root initialization failed\n");
10563 * When the transaction code is updated we should end the
10564 * transaction, but for now progs only knows about commit so
10565 * just return an error.
10571 * Now we have all the in-memory block groups setup so we can make
10572 * allocations properly, and the metadata we care about is safe since we
10573 * pinned all of it above.
10576 struct btrfs_block_group_cache *cache;
10578 cache = btrfs_lookup_first_block_group(fs_info, start);
10581 start = cache->key.objectid + cache->key.offset;
10582 ret = btrfs_insert_item(trans, fs_info->extent_root,
10583 &cache->key, &cache->item,
10584 sizeof(cache->item));
10586 fprintf(stderr, "Error adding block group\n");
10589 btrfs_extent_post_op(trans, fs_info->extent_root);
10592 ret = reset_balance(trans, fs_info);
10594 fprintf(stderr, "error resetting the pending balance\n");
10599 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10601 struct btrfs_path *path;
10602 struct btrfs_trans_handle *trans;
10603 struct btrfs_key key;
10606 printf("Recowing metadata block %llu\n", eb->start);
10607 key.objectid = btrfs_header_owner(eb);
10608 key.type = BTRFS_ROOT_ITEM_KEY;
10609 key.offset = (u64)-1;
10611 root = btrfs_read_fs_root(root->fs_info, &key);
10612 if (IS_ERR(root)) {
10613 fprintf(stderr, "Couldn't find owner root %llu\n",
10615 return PTR_ERR(root);
10618 path = btrfs_alloc_path();
10622 trans = btrfs_start_transaction(root, 1);
10623 if (IS_ERR(trans)) {
10624 btrfs_free_path(path);
10625 return PTR_ERR(trans);
10628 path->lowest_level = btrfs_header_level(eb);
10629 if (path->lowest_level)
10630 btrfs_node_key_to_cpu(eb, &key, 0);
10632 btrfs_item_key_to_cpu(eb, &key, 0);
10634 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10635 btrfs_commit_transaction(trans, root);
10636 btrfs_free_path(path);
10640 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10642 struct btrfs_path *path;
10643 struct btrfs_trans_handle *trans;
10644 struct btrfs_key key;
10647 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10648 bad->key.type, bad->key.offset);
10649 key.objectid = bad->root_id;
10650 key.type = BTRFS_ROOT_ITEM_KEY;
10651 key.offset = (u64)-1;
10653 root = btrfs_read_fs_root(root->fs_info, &key);
10654 if (IS_ERR(root)) {
10655 fprintf(stderr, "Couldn't find owner root %llu\n",
10657 return PTR_ERR(root);
10660 path = btrfs_alloc_path();
10664 trans = btrfs_start_transaction(root, 1);
10665 if (IS_ERR(trans)) {
10666 btrfs_free_path(path);
10667 return PTR_ERR(trans);
10670 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10676 ret = btrfs_del_item(trans, root, path);
10678 btrfs_commit_transaction(trans, root);
10679 btrfs_free_path(path);
10683 static int zero_log_tree(struct btrfs_root *root)
10685 struct btrfs_trans_handle *trans;
10688 trans = btrfs_start_transaction(root, 1);
10689 if (IS_ERR(trans)) {
10690 ret = PTR_ERR(trans);
10693 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10694 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10695 ret = btrfs_commit_transaction(trans, root);
10699 static int populate_csum(struct btrfs_trans_handle *trans,
10700 struct btrfs_root *csum_root, char *buf, u64 start,
10707 while (offset < len) {
10708 sectorsize = csum_root->sectorsize;
10709 ret = read_extent_data(csum_root, buf, start + offset,
10713 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10714 start + offset, buf, sectorsize);
10717 offset += sectorsize;
10722 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10723 struct btrfs_root *csum_root,
10724 struct btrfs_root *cur_root)
10726 struct btrfs_path *path;
10727 struct btrfs_key key;
10728 struct extent_buffer *node;
10729 struct btrfs_file_extent_item *fi;
10736 path = btrfs_alloc_path();
10739 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10749 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10752 /* Iterate all regular file extents and fill its csum */
10754 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10756 if (key.type != BTRFS_EXTENT_DATA_KEY)
10758 node = path->nodes[0];
10759 slot = path->slots[0];
10760 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10761 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10763 start = btrfs_file_extent_disk_bytenr(node, fi);
10764 len = btrfs_file_extent_disk_num_bytes(node, fi);
10766 ret = populate_csum(trans, csum_root, buf, start, len);
10767 if (ret == -EEXIST)
10773 * TODO: if next leaf is corrupted, jump to nearest next valid
10776 ret = btrfs_next_item(cur_root, path);
10786 btrfs_free_path(path);
10791 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10792 struct btrfs_root *csum_root)
10794 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10795 struct btrfs_path *path;
10796 struct btrfs_root *tree_root = fs_info->tree_root;
10797 struct btrfs_root *cur_root;
10798 struct extent_buffer *node;
10799 struct btrfs_key key;
10803 path = btrfs_alloc_path();
10807 key.objectid = BTRFS_FS_TREE_OBJECTID;
10809 key.type = BTRFS_ROOT_ITEM_KEY;
10811 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10820 node = path->nodes[0];
10821 slot = path->slots[0];
10822 btrfs_item_key_to_cpu(node, &key, slot);
10823 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10825 if (key.type != BTRFS_ROOT_ITEM_KEY)
10827 if (!is_fstree(key.objectid))
10829 key.offset = (u64)-1;
10831 cur_root = btrfs_read_fs_root(fs_info, &key);
10832 if (IS_ERR(cur_root) || !cur_root) {
10833 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10837 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10842 ret = btrfs_next_item(tree_root, path);
10852 btrfs_free_path(path);
10856 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10857 struct btrfs_root *csum_root)
10859 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10860 struct btrfs_path *path;
10861 struct btrfs_extent_item *ei;
10862 struct extent_buffer *leaf;
10864 struct btrfs_key key;
10867 path = btrfs_alloc_path();
10872 key.type = BTRFS_EXTENT_ITEM_KEY;
10875 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10877 btrfs_free_path(path);
10881 buf = malloc(csum_root->sectorsize);
10883 btrfs_free_path(path);
10888 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10889 ret = btrfs_next_leaf(extent_root, path);
10897 leaf = path->nodes[0];
10899 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10900 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10905 ei = btrfs_item_ptr(leaf, path->slots[0],
10906 struct btrfs_extent_item);
10907 if (!(btrfs_extent_flags(leaf, ei) &
10908 BTRFS_EXTENT_FLAG_DATA)) {
10913 ret = populate_csum(trans, csum_root, buf, key.objectid,
10920 btrfs_free_path(path);
10926 * Recalculate the csum and put it into the csum tree.
10928 * Extent tree init will wipe out all the extent info, so in that case, we
10929 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10930 * will use fs/subvol trees to init the csum tree.
10932 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10933 struct btrfs_root *csum_root,
10934 int search_fs_tree)
10936 if (search_fs_tree)
10937 return fill_csum_tree_from_fs(trans, csum_root);
10939 return fill_csum_tree_from_extent(trans, csum_root);
10942 static void free_roots_info_cache(void)
10944 if (!roots_info_cache)
10947 while (!cache_tree_empty(roots_info_cache)) {
10948 struct cache_extent *entry;
10949 struct root_item_info *rii;
10951 entry = first_cache_extent(roots_info_cache);
10954 remove_cache_extent(roots_info_cache, entry);
10955 rii = container_of(entry, struct root_item_info, cache_extent);
10959 free(roots_info_cache);
10960 roots_info_cache = NULL;
10963 static int build_roots_info_cache(struct btrfs_fs_info *info)
10966 struct btrfs_key key;
10967 struct extent_buffer *leaf;
10968 struct btrfs_path *path;
10970 if (!roots_info_cache) {
10971 roots_info_cache = malloc(sizeof(*roots_info_cache));
10972 if (!roots_info_cache)
10974 cache_tree_init(roots_info_cache);
10977 path = btrfs_alloc_path();
10982 key.type = BTRFS_EXTENT_ITEM_KEY;
10985 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10988 leaf = path->nodes[0];
10991 struct btrfs_key found_key;
10992 struct btrfs_extent_item *ei;
10993 struct btrfs_extent_inline_ref *iref;
10994 int slot = path->slots[0];
10999 struct cache_extent *entry;
11000 struct root_item_info *rii;
11002 if (slot >= btrfs_header_nritems(leaf)) {
11003 ret = btrfs_next_leaf(info->extent_root, path);
11010 leaf = path->nodes[0];
11011 slot = path->slots[0];
11014 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11016 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
11017 found_key.type != BTRFS_METADATA_ITEM_KEY)
11020 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11021 flags = btrfs_extent_flags(leaf, ei);
11023 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
11024 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
11027 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
11028 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11029 level = found_key.offset;
11031 struct btrfs_tree_block_info *binfo;
11033 binfo = (struct btrfs_tree_block_info *)(ei + 1);
11034 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
11035 level = btrfs_tree_block_level(leaf, binfo);
11039 * For a root extent, it must be of the following type and the
11040 * first (and only one) iref in the item.
11042 type = btrfs_extent_inline_ref_type(leaf, iref);
11043 if (type != BTRFS_TREE_BLOCK_REF_KEY)
11046 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
11047 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11049 rii = malloc(sizeof(struct root_item_info));
11054 rii->cache_extent.start = root_id;
11055 rii->cache_extent.size = 1;
11056 rii->level = (u8)-1;
11057 entry = &rii->cache_extent;
11058 ret = insert_cache_extent(roots_info_cache, entry);
11061 rii = container_of(entry, struct root_item_info,
11065 ASSERT(rii->cache_extent.start == root_id);
11066 ASSERT(rii->cache_extent.size == 1);
11068 if (level > rii->level || rii->level == (u8)-1) {
11069 rii->level = level;
11070 rii->bytenr = found_key.objectid;
11071 rii->gen = btrfs_extent_generation(leaf, ei);
11072 rii->node_count = 1;
11073 } else if (level == rii->level) {
11081 btrfs_free_path(path);
11086 static int maybe_repair_root_item(struct btrfs_fs_info *info,
11087 struct btrfs_path *path,
11088 const struct btrfs_key *root_key,
11089 const int read_only_mode)
11091 const u64 root_id = root_key->objectid;
11092 struct cache_extent *entry;
11093 struct root_item_info *rii;
11094 struct btrfs_root_item ri;
11095 unsigned long offset;
11097 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
11100 "Error: could not find extent items for root %llu\n",
11101 root_key->objectid);
11105 rii = container_of(entry, struct root_item_info, cache_extent);
11106 ASSERT(rii->cache_extent.start == root_id);
11107 ASSERT(rii->cache_extent.size == 1);
11109 if (rii->node_count != 1) {
11111 "Error: could not find btree root extent for root %llu\n",
11116 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
11117 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
11119 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
11120 btrfs_root_level(&ri) != rii->level ||
11121 btrfs_root_generation(&ri) != rii->gen) {
11124 * If we're in repair mode but our caller told us to not update
11125 * the root item, i.e. just check if it needs to be updated, don't
11126 * print this message, since the caller will call us again shortly
11127 * for the same root item without read only mode (the caller will
11128 * open a transaction first).
11130 if (!(read_only_mode && repair))
11132 "%sroot item for root %llu,"
11133 " current bytenr %llu, current gen %llu, current level %u,"
11134 " new bytenr %llu, new gen %llu, new level %u\n",
11135 (read_only_mode ? "" : "fixing "),
11137 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
11138 btrfs_root_level(&ri),
11139 rii->bytenr, rii->gen, rii->level);
11141 if (btrfs_root_generation(&ri) > rii->gen) {
11143 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
11144 root_id, btrfs_root_generation(&ri), rii->gen);
11148 if (!read_only_mode) {
11149 btrfs_set_root_bytenr(&ri, rii->bytenr);
11150 btrfs_set_root_level(&ri, rii->level);
11151 btrfs_set_root_generation(&ri, rii->gen);
11152 write_extent_buffer(path->nodes[0], &ri,
11153 offset, sizeof(ri));
11163 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
11164 * caused read-only snapshots to be corrupted if they were created at a moment
11165 * when the source subvolume/snapshot had orphan items. The issue was that the
11166 * on-disk root items became incorrect, referring to the pre orphan cleanup root
11167 * node instead of the post orphan cleanup root node.
11168 * So this function, and its callees, just detects and fixes those cases. Even
11169 * though the regression was for read-only snapshots, this function applies to
11170 * any snapshot/subvolume root.
11171 * This must be run before any other repair code - not doing it so, makes other
11172 * repair code delete or modify backrefs in the extent tree for example, which
11173 * will result in an inconsistent fs after repairing the root items.
11175 static int repair_root_items(struct btrfs_fs_info *info)
11177 struct btrfs_path *path = NULL;
11178 struct btrfs_key key;
11179 struct extent_buffer *leaf;
11180 struct btrfs_trans_handle *trans = NULL;
11183 int need_trans = 0;
11185 ret = build_roots_info_cache(info);
11189 path = btrfs_alloc_path();
11195 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
11196 key.type = BTRFS_ROOT_ITEM_KEY;
11201 * Avoid opening and committing transactions if a leaf doesn't have
11202 * any root items that need to be fixed, so that we avoid rotating
11203 * backup roots unnecessarily.
11206 trans = btrfs_start_transaction(info->tree_root, 1);
11207 if (IS_ERR(trans)) {
11208 ret = PTR_ERR(trans);
11213 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
11217 leaf = path->nodes[0];
11220 struct btrfs_key found_key;
11222 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
11223 int no_more_keys = find_next_key(path, &key);
11225 btrfs_release_path(path);
11227 ret = btrfs_commit_transaction(trans,
11239 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
11241 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
11243 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
11246 ret = maybe_repair_root_item(info, path, &found_key,
11251 if (!trans && repair) {
11254 btrfs_release_path(path);
11264 free_roots_info_cache();
11265 btrfs_free_path(path);
11267 btrfs_commit_transaction(trans, info->tree_root);
11274 const char * const cmd_check_usage[] = {
11275 "btrfs check [options] <device>",
11276 "Check structural integrity of a filesystem (unmounted).",
11277 "Check structural integrity of an unmounted filesystem. Verify internal",
11278 "trees' consistency and item connectivity. In the repair mode try to",
11279 "fix the problems found. ",
11280 "WARNING: the repair mode is considered dangerous",
11282 "-s|--super <superblock> use this superblock copy",
11283 "-b|--backup use the first valid backup root copy",
11284 "--repair try to repair the filesystem",
11285 "--readonly run in read-only mode (default)",
11286 "--init-csum-tree create a new CRC tree",
11287 "--init-extent-tree create a new extent tree",
11288 "--mode <MODE> select mode, allows to make some memory/IO",
11289 " trade-offs, where MODE is one of:",
11290 " original - read inodes and extents to memory (requires",
11291 " more memory, does less IO)",
11292 " lowmem - try to use less memory but read blocks again",
11294 "--check-data-csum verify checksums of data blocks",
11295 "-Q|--qgroup-report print a report on qgroup consistency",
11296 "-E|--subvol-extents <subvolid>",
11297 " print subvolume extents and sharing state",
11298 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
11299 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
11300 "-p|--progress indicate progress",
11304 int cmd_check(int argc, char **argv)
11306 struct cache_tree root_cache;
11307 struct btrfs_root *root;
11308 struct btrfs_fs_info *info;
11311 u64 tree_root_bytenr = 0;
11312 u64 chunk_root_bytenr = 0;
11313 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
11316 int init_csum_tree = 0;
11318 int qgroup_report = 0;
11319 int qgroups_repaired = 0;
11320 unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
11324 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
11325 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
11326 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
11328 static const struct option long_options[] = {
11329 { "super", required_argument, NULL, 's' },
11330 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
11331 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
11332 { "init-csum-tree", no_argument, NULL,
11333 GETOPT_VAL_INIT_CSUM },
11334 { "init-extent-tree", no_argument, NULL,
11335 GETOPT_VAL_INIT_EXTENT },
11336 { "check-data-csum", no_argument, NULL,
11337 GETOPT_VAL_CHECK_CSUM },
11338 { "backup", no_argument, NULL, 'b' },
11339 { "subvol-extents", required_argument, NULL, 'E' },
11340 { "qgroup-report", no_argument, NULL, 'Q' },
11341 { "tree-root", required_argument, NULL, 'r' },
11342 { "chunk-root", required_argument, NULL,
11343 GETOPT_VAL_CHUNK_TREE },
11344 { "progress", no_argument, NULL, 'p' },
11345 { "mode", required_argument, NULL,
11347 { NULL, 0, NULL, 0}
11350 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
11354 case 'a': /* ignored */ break;
11356 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
11359 num = arg_strtou64(optarg);
11360 if (num >= BTRFS_SUPER_MIRROR_MAX) {
11362 "ERROR: super mirror should be less than: %d\n",
11363 BTRFS_SUPER_MIRROR_MAX);
11366 bytenr = btrfs_sb_offset(((int)num));
11367 printf("using SB copy %llu, bytenr %llu\n", num,
11368 (unsigned long long)bytenr);
11374 subvolid = arg_strtou64(optarg);
11377 tree_root_bytenr = arg_strtou64(optarg);
11379 case GETOPT_VAL_CHUNK_TREE:
11380 chunk_root_bytenr = arg_strtou64(optarg);
11383 ctx.progress_enabled = true;
11387 usage(cmd_check_usage);
11388 case GETOPT_VAL_REPAIR:
11389 printf("enabling repair mode\n");
11391 ctree_flags |= OPEN_CTREE_WRITES;
11393 case GETOPT_VAL_READONLY:
11396 case GETOPT_VAL_INIT_CSUM:
11397 printf("Creating a new CRC tree\n");
11398 init_csum_tree = 1;
11400 ctree_flags |= OPEN_CTREE_WRITES;
11402 case GETOPT_VAL_INIT_EXTENT:
11403 init_extent_tree = 1;
11404 ctree_flags |= (OPEN_CTREE_WRITES |
11405 OPEN_CTREE_NO_BLOCK_GROUPS);
11408 case GETOPT_VAL_CHECK_CSUM:
11409 check_data_csum = 1;
11411 case GETOPT_VAL_MODE:
11412 check_mode = parse_check_mode(optarg);
11413 if (check_mode == CHECK_MODE_UNKNOWN) {
11414 error("unknown mode: %s", optarg);
11421 if (check_argc_exact(argc - optind, 1))
11422 usage(cmd_check_usage);
11424 if (ctx.progress_enabled) {
11425 ctx.tp = TASK_NOTHING;
11426 ctx.info = task_init(print_status_check, print_status_return, &ctx);
11429 /* This check is the only reason for --readonly to exist */
11430 if (readonly && repair) {
11431 fprintf(stderr, "Repair options are not compatible with --readonly\n");
11436 * Not supported yet
11438 if (repair && check_mode == CHECK_MODE_LOWMEM) {
11439 error("Low memory mode doesn't support repair yet");
11444 cache_tree_init(&root_cache);
11446 if((ret = check_mounted(argv[optind])) < 0) {
11447 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
11450 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
11455 /* only allow partial opening under repair mode */
11457 ctree_flags |= OPEN_CTREE_PARTIAL;
11459 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
11460 chunk_root_bytenr, ctree_flags);
11462 fprintf(stderr, "Couldn't open file system\n");
11467 global_info = info;
11468 root = info->fs_root;
11471 * repair mode will force us to commit transaction which
11472 * will make us fail to load log tree when mounting.
11474 if (repair && btrfs_super_log_root(info->super_copy)) {
11475 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
11480 ret = zero_log_tree(root);
11482 fprintf(stderr, "fail to zero log tree\n");
11487 uuid_unparse(info->super_copy->fsid, uuidbuf);
11488 if (qgroup_report) {
11489 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
11491 ret = qgroup_verify_all(info);
11497 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
11498 subvolid, argv[optind], uuidbuf);
11499 ret = print_extent_state(info, subvolid);
11502 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
11504 if (!extent_buffer_uptodate(info->tree_root->node) ||
11505 !extent_buffer_uptodate(info->dev_root->node) ||
11506 !extent_buffer_uptodate(info->chunk_root->node)) {
11507 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11512 if (init_extent_tree || init_csum_tree) {
11513 struct btrfs_trans_handle *trans;
11515 trans = btrfs_start_transaction(info->extent_root, 0);
11516 if (IS_ERR(trans)) {
11517 fprintf(stderr, "Error starting transaction\n");
11518 ret = PTR_ERR(trans);
11522 if (init_extent_tree) {
11523 printf("Creating a new extent tree\n");
11524 ret = reinit_extent_tree(trans, info);
11529 if (init_csum_tree) {
11530 fprintf(stderr, "Reinit crc root\n");
11531 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
11533 fprintf(stderr, "crc root initialization failed\n");
11538 ret = fill_csum_tree(trans, info->csum_root,
11541 fprintf(stderr, "crc refilling failed\n");
11546 * Ok now we commit and run the normal fsck, which will add
11547 * extent entries for all of the items it finds.
11549 ret = btrfs_commit_transaction(trans, info->extent_root);
11553 if (!extent_buffer_uptodate(info->extent_root->node)) {
11554 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
11558 if (!extent_buffer_uptodate(info->csum_root->node)) {
11559 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
11564 if (!ctx.progress_enabled)
11565 fprintf(stderr, "checking extents\n");
11566 if (check_mode == CHECK_MODE_LOWMEM)
11567 ret = check_chunks_and_extents_v2(root);
11569 ret = check_chunks_and_extents(root);
11571 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
11573 ret = repair_root_items(info);
11577 fprintf(stderr, "Fixed %d roots.\n", ret);
11579 } else if (ret > 0) {
11581 "Found %d roots with an outdated root item.\n",
11584 "Please run a filesystem check with the option --repair to fix them.\n");
11589 if (!ctx.progress_enabled) {
11590 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
11591 fprintf(stderr, "checking free space tree\n");
11593 fprintf(stderr, "checking free space cache\n");
11595 ret = check_space_cache(root);
11600 * We used to have to have these hole extents in between our real
11601 * extents so if we don't have this flag set we need to make sure there
11602 * are no gaps in the file extents for inodes, otherwise we can just
11603 * ignore it when this happens.
11605 no_holes = btrfs_fs_incompat(root->fs_info,
11606 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11607 if (!ctx.progress_enabled)
11608 fprintf(stderr, "checking fs roots\n");
11609 ret = check_fs_roots(root, &root_cache);
11613 fprintf(stderr, "checking csums\n");
11614 ret = check_csums(root);
11618 fprintf(stderr, "checking root refs\n");
11619 ret = check_root_refs(root, &root_cache);
11623 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11624 struct extent_buffer *eb;
11626 eb = list_first_entry(&root->fs_info->recow_ebs,
11627 struct extent_buffer, recow);
11628 list_del_init(&eb->recow);
11629 ret = recow_extent_buffer(root, eb);
11634 while (!list_empty(&delete_items)) {
11635 struct bad_item *bad;
11637 bad = list_first_entry(&delete_items, struct bad_item, list);
11638 list_del_init(&bad->list);
11640 ret = delete_bad_item(root, bad);
11644 if (info->quota_enabled) {
11646 fprintf(stderr, "checking quota groups\n");
11647 err = qgroup_verify_all(info);
11651 err = repair_qgroups(info, &qgroups_repaired);
11656 if (!list_empty(&root->fs_info->recow_ebs)) {
11657 fprintf(stderr, "Transid errors in file system\n");
11661 /* Don't override original ret */
11662 if (!ret && qgroups_repaired)
11663 ret = qgroups_repaired;
11665 if (found_old_backref) { /*
11666 * there was a disk format change when mixed
11667 * backref was in testing tree. The old format
11668 * existed about one week.
11670 printf("\n * Found old mixed backref format. "
11671 "The old format is not supported! *"
11672 "\n * Please mount the FS in readonly mode, "
11673 "backup data and re-format the FS. *\n\n");
11676 printf("found %llu bytes used err is %d\n",
11677 (unsigned long long)bytes_used, ret);
11678 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11679 printf("total tree bytes: %llu\n",
11680 (unsigned long long)total_btree_bytes);
11681 printf("total fs tree bytes: %llu\n",
11682 (unsigned long long)total_fs_tree_bytes);
11683 printf("total extent tree bytes: %llu\n",
11684 (unsigned long long)total_extent_tree_bytes);
11685 printf("btree space waste bytes: %llu\n",
11686 (unsigned long long)btree_space_waste);
11687 printf("file data blocks allocated: %llu\n referenced %llu\n",
11688 (unsigned long long)data_bytes_allocated,
11689 (unsigned long long)data_bytes_referenced);
11691 free_qgroup_counts();
11692 free_root_recs_tree(&root_cache);
11696 if (ctx.progress_enabled)
11697 task_deinit(ctx.info);