2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
437 static void *print_status_check(void *p)
439 struct task_ctx *priv = p;
440 const char work_indicator[] = { '.', 'o', 'O', 'o' };
442 static char *task_position_string[] = {
444 "checking free space cache",
448 task_period_start(priv->info, 1000 /* 1s */);
450 if (priv->tp == TASK_NOTHING)
454 printf("%s [%c]\r", task_position_string[priv->tp],
455 work_indicator[count % 4]);
458 task_period_wait(priv->info);
463 static int print_status_return(void *p)
471 /* Compatible function to allow reuse of old codes */
472 static u64 first_extent_gap(struct rb_root *holes)
474 struct file_extent_hole *hole;
476 if (RB_EMPTY_ROOT(holes))
479 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
483 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
485 struct file_extent_hole *hole1;
486 struct file_extent_hole *hole2;
488 hole1 = rb_entry(node1, struct file_extent_hole, node);
489 hole2 = rb_entry(node2, struct file_extent_hole, node);
491 if (hole1->start > hole2->start)
493 if (hole1->start < hole2->start)
495 /* Now hole1->start == hole2->start */
496 if (hole1->len >= hole2->len)
498 * Hole 1 will be merge center
499 * Same hole will be merged later
502 /* Hole 2 will be merge center */
507 * Add a hole to the record
509 * This will do hole merge for copy_file_extent_holes(),
510 * which will ensure there won't be continuous holes.
512 static int add_file_extent_hole(struct rb_root *holes,
515 struct file_extent_hole *hole;
516 struct file_extent_hole *prev = NULL;
517 struct file_extent_hole *next = NULL;
519 hole = malloc(sizeof(*hole));
524 /* Since compare will not return 0, no -EEXIST will happen */
525 rb_insert(holes, &hole->node, compare_hole);
527 /* simple merge with previous hole */
528 if (rb_prev(&hole->node))
529 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
531 if (prev && prev->start + prev->len >= hole->start) {
532 hole->len = hole->start + hole->len - prev->start;
533 hole->start = prev->start;
534 rb_erase(&prev->node, holes);
539 /* iterate merge with next holes */
541 if (!rb_next(&hole->node))
543 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
545 if (hole->start + hole->len >= next->start) {
546 if (hole->start + hole->len <= next->start + next->len)
547 hole->len = next->start + next->len -
549 rb_erase(&next->node, holes);
558 static int compare_hole_range(struct rb_node *node, void *data)
560 struct file_extent_hole *hole;
563 hole = (struct file_extent_hole *)data;
566 hole = rb_entry(node, struct file_extent_hole, node);
567 if (start < hole->start)
569 if (start >= hole->start && start < hole->start + hole->len)
575 * Delete a hole in the record
577 * This will do the hole split and is much restrict than add.
579 static int del_file_extent_hole(struct rb_root *holes,
582 struct file_extent_hole *hole;
583 struct file_extent_hole tmp;
588 struct rb_node *node;
595 node = rb_search(holes, &tmp, compare_hole_range, NULL);
598 hole = rb_entry(node, struct file_extent_hole, node);
599 if (start + len > hole->start + hole->len)
603 * Now there will be no overlap, delete the hole and re-add the
604 * split(s) if they exists.
606 if (start > hole->start) {
607 prev_start = hole->start;
608 prev_len = start - hole->start;
611 if (hole->start + hole->len > start + len) {
612 next_start = start + len;
613 next_len = hole->start + hole->len - start - len;
616 rb_erase(node, holes);
619 ret = add_file_extent_hole(holes, prev_start, prev_len);
624 ret = add_file_extent_hole(holes, next_start, next_len);
631 static int copy_file_extent_holes(struct rb_root *dst,
634 struct file_extent_hole *hole;
635 struct rb_node *node;
638 node = rb_first(src);
640 hole = rb_entry(node, struct file_extent_hole, node);
641 ret = add_file_extent_hole(dst, hole->start, hole->len);
644 node = rb_next(node);
649 static void free_file_extent_holes(struct rb_root *holes)
651 struct rb_node *node;
652 struct file_extent_hole *hole;
654 node = rb_first(holes);
656 hole = rb_entry(node, struct file_extent_hole, node);
657 rb_erase(node, holes);
659 node = rb_first(holes);
663 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
665 static void record_root_in_trans(struct btrfs_trans_handle *trans,
666 struct btrfs_root *root)
668 if (root->last_trans != trans->transid) {
669 root->track_dirty = 1;
670 root->last_trans = trans->transid;
671 root->commit_root = root->node;
672 extent_buffer_get(root->node);
676 static u8 imode_to_type(u32 imode)
679 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
680 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
681 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
682 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
683 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
684 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
685 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
686 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
689 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
693 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
695 struct device_record *rec1;
696 struct device_record *rec2;
698 rec1 = rb_entry(node1, struct device_record, node);
699 rec2 = rb_entry(node2, struct device_record, node);
700 if (rec1->devid > rec2->devid)
702 else if (rec1->devid < rec2->devid)
708 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
710 struct inode_record *rec;
711 struct inode_backref *backref;
712 struct inode_backref *orig;
713 struct inode_backref *tmp;
714 struct orphan_data_extent *src_orphan;
715 struct orphan_data_extent *dst_orphan;
719 rec = malloc(sizeof(*rec));
721 return ERR_PTR(-ENOMEM);
722 memcpy(rec, orig_rec, sizeof(*rec));
724 INIT_LIST_HEAD(&rec->backrefs);
725 INIT_LIST_HEAD(&rec->orphan_extents);
726 rec->holes = RB_ROOT;
728 list_for_each_entry(orig, &orig_rec->backrefs, list) {
729 size = sizeof(*orig) + orig->namelen + 1;
730 backref = malloc(size);
735 memcpy(backref, orig, size);
736 list_add_tail(&backref->list, &rec->backrefs);
738 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
739 dst_orphan = malloc(sizeof(*dst_orphan));
744 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
745 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
747 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
753 if (!list_empty(&rec->backrefs))
754 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
755 list_del(&orig->list);
759 if (!list_empty(&rec->orphan_extents))
760 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
761 list_del(&orig->list);
770 static void print_orphan_data_extents(struct list_head *orphan_extents,
773 struct orphan_data_extent *orphan;
775 if (list_empty(orphan_extents))
777 printf("The following data extent is lost in tree %llu:\n",
779 list_for_each_entry(orphan, orphan_extents, list) {
780 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
781 orphan->objectid, orphan->offset, orphan->disk_bytenr,
786 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
788 u64 root_objectid = root->root_key.objectid;
789 int errors = rec->errors;
793 /* reloc root errors, we print its corresponding fs root objectid*/
794 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
795 root_objectid = root->root_key.offset;
796 fprintf(stderr, "reloc");
798 fprintf(stderr, "root %llu inode %llu errors %x",
799 (unsigned long long) root_objectid,
800 (unsigned long long) rec->ino, rec->errors);
802 if (errors & I_ERR_NO_INODE_ITEM)
803 fprintf(stderr, ", no inode item");
804 if (errors & I_ERR_NO_ORPHAN_ITEM)
805 fprintf(stderr, ", no orphan item");
806 if (errors & I_ERR_DUP_INODE_ITEM)
807 fprintf(stderr, ", dup inode item");
808 if (errors & I_ERR_DUP_DIR_INDEX)
809 fprintf(stderr, ", dup dir index");
810 if (errors & I_ERR_ODD_DIR_ITEM)
811 fprintf(stderr, ", odd dir item");
812 if (errors & I_ERR_ODD_FILE_EXTENT)
813 fprintf(stderr, ", odd file extent");
814 if (errors & I_ERR_BAD_FILE_EXTENT)
815 fprintf(stderr, ", bad file extent");
816 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
817 fprintf(stderr, ", file extent overlap");
818 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
819 fprintf(stderr, ", file extent discount");
820 if (errors & I_ERR_DIR_ISIZE_WRONG)
821 fprintf(stderr, ", dir isize wrong");
822 if (errors & I_ERR_FILE_NBYTES_WRONG)
823 fprintf(stderr, ", nbytes wrong");
824 if (errors & I_ERR_ODD_CSUM_ITEM)
825 fprintf(stderr, ", odd csum item");
826 if (errors & I_ERR_SOME_CSUM_MISSING)
827 fprintf(stderr, ", some csum missing");
828 if (errors & I_ERR_LINK_COUNT_WRONG)
829 fprintf(stderr, ", link count wrong");
830 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
831 fprintf(stderr, ", orphan file extent");
832 fprintf(stderr, "\n");
833 /* Print the orphan extents if needed */
834 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
835 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
837 /* Print the holes if needed */
838 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
839 struct file_extent_hole *hole;
840 struct rb_node *node;
843 node = rb_first(&rec->holes);
844 fprintf(stderr, "Found file extent holes:\n");
847 hole = rb_entry(node, struct file_extent_hole, node);
848 fprintf(stderr, "\tstart: %llu, len: %llu\n",
849 hole->start, hole->len);
850 node = rb_next(node);
853 fprintf(stderr, "\tstart: 0, len: %llu\n",
854 round_up(rec->isize, root->sectorsize));
858 static void print_ref_error(int errors)
860 if (errors & REF_ERR_NO_DIR_ITEM)
861 fprintf(stderr, ", no dir item");
862 if (errors & REF_ERR_NO_DIR_INDEX)
863 fprintf(stderr, ", no dir index");
864 if (errors & REF_ERR_NO_INODE_REF)
865 fprintf(stderr, ", no inode ref");
866 if (errors & REF_ERR_DUP_DIR_ITEM)
867 fprintf(stderr, ", dup dir item");
868 if (errors & REF_ERR_DUP_DIR_INDEX)
869 fprintf(stderr, ", dup dir index");
870 if (errors & REF_ERR_DUP_INODE_REF)
871 fprintf(stderr, ", dup inode ref");
872 if (errors & REF_ERR_INDEX_UNMATCH)
873 fprintf(stderr, ", index mismatch");
874 if (errors & REF_ERR_FILETYPE_UNMATCH)
875 fprintf(stderr, ", filetype mismatch");
876 if (errors & REF_ERR_NAME_TOO_LONG)
877 fprintf(stderr, ", name too long");
878 if (errors & REF_ERR_NO_ROOT_REF)
879 fprintf(stderr, ", no root ref");
880 if (errors & REF_ERR_NO_ROOT_BACKREF)
881 fprintf(stderr, ", no root backref");
882 if (errors & REF_ERR_DUP_ROOT_REF)
883 fprintf(stderr, ", dup root ref");
884 if (errors & REF_ERR_DUP_ROOT_BACKREF)
885 fprintf(stderr, ", dup root backref");
886 fprintf(stderr, "\n");
889 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
892 struct ptr_node *node;
893 struct cache_extent *cache;
894 struct inode_record *rec = NULL;
897 cache = lookup_cache_extent(inode_cache, ino, 1);
899 node = container_of(cache, struct ptr_node, cache);
901 if (mod && rec->refs > 1) {
902 node->data = clone_inode_rec(rec);
903 if (IS_ERR(node->data))
909 rec = calloc(1, sizeof(*rec));
911 return ERR_PTR(-ENOMEM);
913 rec->extent_start = (u64)-1;
915 INIT_LIST_HEAD(&rec->backrefs);
916 INIT_LIST_HEAD(&rec->orphan_extents);
917 rec->holes = RB_ROOT;
919 node = malloc(sizeof(*node));
922 return ERR_PTR(-ENOMEM);
924 node->cache.start = ino;
925 node->cache.size = 1;
928 if (ino == BTRFS_FREE_INO_OBJECTID)
931 ret = insert_cache_extent(inode_cache, &node->cache);
933 return ERR_PTR(-EEXIST);
938 static void free_orphan_data_extents(struct list_head *orphan_extents)
940 struct orphan_data_extent *orphan;
942 while (!list_empty(orphan_extents)) {
943 orphan = list_entry(orphan_extents->next,
944 struct orphan_data_extent, list);
945 list_del(&orphan->list);
950 static void free_inode_rec(struct inode_record *rec)
952 struct inode_backref *backref;
957 while (!list_empty(&rec->backrefs)) {
958 backref = to_inode_backref(rec->backrefs.next);
959 list_del(&backref->list);
962 free_orphan_data_extents(&rec->orphan_extents);
963 free_file_extent_holes(&rec->holes);
967 static int can_free_inode_rec(struct inode_record *rec)
969 if (!rec->errors && rec->checked && rec->found_inode_item &&
970 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
975 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
976 struct inode_record *rec)
978 struct cache_extent *cache;
979 struct inode_backref *tmp, *backref;
980 struct ptr_node *node;
981 unsigned char filetype;
983 if (!rec->found_inode_item)
986 filetype = imode_to_type(rec->imode);
987 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
988 if (backref->found_dir_item && backref->found_dir_index) {
989 if (backref->filetype != filetype)
990 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
991 if (!backref->errors && backref->found_inode_ref &&
992 rec->nlink == rec->found_link) {
993 list_del(&backref->list);
999 if (!rec->checked || rec->merging)
1002 if (S_ISDIR(rec->imode)) {
1003 if (rec->found_size != rec->isize)
1004 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1005 if (rec->found_file_extent)
1006 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1007 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1008 if (rec->found_dir_item)
1009 rec->errors |= I_ERR_ODD_DIR_ITEM;
1010 if (rec->found_size != rec->nbytes)
1011 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1012 if (rec->nlink > 0 && !no_holes &&
1013 (rec->extent_end < rec->isize ||
1014 first_extent_gap(&rec->holes) < rec->isize))
1015 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1018 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1019 if (rec->found_csum_item && rec->nodatasum)
1020 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1021 if (rec->some_csum_missing && !rec->nodatasum)
1022 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1025 BUG_ON(rec->refs != 1);
1026 if (can_free_inode_rec(rec)) {
1027 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1028 node = container_of(cache, struct ptr_node, cache);
1029 BUG_ON(node->data != rec);
1030 remove_cache_extent(inode_cache, &node->cache);
1032 free_inode_rec(rec);
1036 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1038 struct btrfs_path path;
1039 struct btrfs_key key;
1042 key.objectid = BTRFS_ORPHAN_OBJECTID;
1043 key.type = BTRFS_ORPHAN_ITEM_KEY;
1046 btrfs_init_path(&path);
1047 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1048 btrfs_release_path(&path);
1054 static int process_inode_item(struct extent_buffer *eb,
1055 int slot, struct btrfs_key *key,
1056 struct shared_node *active_node)
1058 struct inode_record *rec;
1059 struct btrfs_inode_item *item;
1061 rec = active_node->current;
1062 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1063 if (rec->found_inode_item) {
1064 rec->errors |= I_ERR_DUP_INODE_ITEM;
1067 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1068 rec->nlink = btrfs_inode_nlink(eb, item);
1069 rec->isize = btrfs_inode_size(eb, item);
1070 rec->nbytes = btrfs_inode_nbytes(eb, item);
1071 rec->imode = btrfs_inode_mode(eb, item);
1072 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1074 rec->found_inode_item = 1;
1075 if (rec->nlink == 0)
1076 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1077 maybe_free_inode_rec(&active_node->inode_cache, rec);
1081 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1083 int namelen, u64 dir)
1085 struct inode_backref *backref;
1087 list_for_each_entry(backref, &rec->backrefs, list) {
1088 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1090 if (backref->dir != dir || backref->namelen != namelen)
1092 if (memcmp(name, backref->name, namelen))
1097 backref = malloc(sizeof(*backref) + namelen + 1);
1100 memset(backref, 0, sizeof(*backref));
1102 backref->namelen = namelen;
1103 memcpy(backref->name, name, namelen);
1104 backref->name[namelen] = '\0';
1105 list_add_tail(&backref->list, &rec->backrefs);
1109 static int add_inode_backref(struct cache_tree *inode_cache,
1110 u64 ino, u64 dir, u64 index,
1111 const char *name, int namelen,
1112 int filetype, int itemtype, int errors)
1114 struct inode_record *rec;
1115 struct inode_backref *backref;
1117 rec = get_inode_rec(inode_cache, ino, 1);
1118 BUG_ON(IS_ERR(rec));
1119 backref = get_inode_backref(rec, name, namelen, dir);
1122 backref->errors |= errors;
1123 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1124 if (backref->found_dir_index)
1125 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1126 if (backref->found_inode_ref && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 if (backref->found_dir_item && backref->filetype != filetype)
1129 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1131 backref->index = index;
1132 backref->filetype = filetype;
1133 backref->found_dir_index = 1;
1134 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1136 if (backref->found_dir_item)
1137 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1138 if (backref->found_dir_index && backref->filetype != filetype)
1139 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1141 backref->filetype = filetype;
1142 backref->found_dir_item = 1;
1143 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1144 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1145 if (backref->found_inode_ref)
1146 backref->errors |= REF_ERR_DUP_INODE_REF;
1147 if (backref->found_dir_index && backref->index != index)
1148 backref->errors |= REF_ERR_INDEX_UNMATCH;
1150 backref->index = index;
1152 backref->ref_type = itemtype;
1153 backref->found_inode_ref = 1;
1158 maybe_free_inode_rec(inode_cache, rec);
1162 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1163 struct cache_tree *dst_cache)
1165 struct inode_backref *backref;
1170 list_for_each_entry(backref, &src->backrefs, list) {
1171 if (backref->found_dir_index) {
1172 add_inode_backref(dst_cache, dst->ino, backref->dir,
1173 backref->index, backref->name,
1174 backref->namelen, backref->filetype,
1175 BTRFS_DIR_INDEX_KEY, backref->errors);
1177 if (backref->found_dir_item) {
1179 add_inode_backref(dst_cache, dst->ino,
1180 backref->dir, 0, backref->name,
1181 backref->namelen, backref->filetype,
1182 BTRFS_DIR_ITEM_KEY, backref->errors);
1184 if (backref->found_inode_ref) {
1185 add_inode_backref(dst_cache, dst->ino,
1186 backref->dir, backref->index,
1187 backref->name, backref->namelen, 0,
1188 backref->ref_type, backref->errors);
1192 if (src->found_dir_item)
1193 dst->found_dir_item = 1;
1194 if (src->found_file_extent)
1195 dst->found_file_extent = 1;
1196 if (src->found_csum_item)
1197 dst->found_csum_item = 1;
1198 if (src->some_csum_missing)
1199 dst->some_csum_missing = 1;
1200 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1201 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1206 BUG_ON(src->found_link < dir_count);
1207 dst->found_link += src->found_link - dir_count;
1208 dst->found_size += src->found_size;
1209 if (src->extent_start != (u64)-1) {
1210 if (dst->extent_start == (u64)-1) {
1211 dst->extent_start = src->extent_start;
1212 dst->extent_end = src->extent_end;
1214 if (dst->extent_end > src->extent_start)
1215 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1216 else if (dst->extent_end < src->extent_start) {
1217 ret = add_file_extent_hole(&dst->holes,
1219 src->extent_start - dst->extent_end);
1221 if (dst->extent_end < src->extent_end)
1222 dst->extent_end = src->extent_end;
1226 dst->errors |= src->errors;
1227 if (src->found_inode_item) {
1228 if (!dst->found_inode_item) {
1229 dst->nlink = src->nlink;
1230 dst->isize = src->isize;
1231 dst->nbytes = src->nbytes;
1232 dst->imode = src->imode;
1233 dst->nodatasum = src->nodatasum;
1234 dst->found_inode_item = 1;
1236 dst->errors |= I_ERR_DUP_INODE_ITEM;
1244 static int splice_shared_node(struct shared_node *src_node,
1245 struct shared_node *dst_node)
1247 struct cache_extent *cache;
1248 struct ptr_node *node, *ins;
1249 struct cache_tree *src, *dst;
1250 struct inode_record *rec, *conflict;
1251 u64 current_ino = 0;
1255 if (--src_node->refs == 0)
1257 if (src_node->current)
1258 current_ino = src_node->current->ino;
1260 src = &src_node->root_cache;
1261 dst = &dst_node->root_cache;
1263 cache = search_cache_extent(src, 0);
1265 node = container_of(cache, struct ptr_node, cache);
1267 cache = next_cache_extent(cache);
1270 remove_cache_extent(src, &node->cache);
1273 ins = malloc(sizeof(*ins));
1275 ins->cache.start = node->cache.start;
1276 ins->cache.size = node->cache.size;
1280 ret = insert_cache_extent(dst, &ins->cache);
1281 if (ret == -EEXIST) {
1282 conflict = get_inode_rec(dst, rec->ino, 1);
1283 BUG_ON(IS_ERR(conflict));
1284 merge_inode_recs(rec, conflict, dst);
1286 conflict->checked = 1;
1287 if (dst_node->current == conflict)
1288 dst_node->current = NULL;
1290 maybe_free_inode_rec(dst, conflict);
1291 free_inode_rec(rec);
1298 if (src == &src_node->root_cache) {
1299 src = &src_node->inode_cache;
1300 dst = &dst_node->inode_cache;
1304 if (current_ino > 0 && (!dst_node->current ||
1305 current_ino > dst_node->current->ino)) {
1306 if (dst_node->current) {
1307 dst_node->current->checked = 1;
1308 maybe_free_inode_rec(dst, dst_node->current);
1310 dst_node->current = get_inode_rec(dst, current_ino, 1);
1311 BUG_ON(IS_ERR(dst_node->current));
1316 static void free_inode_ptr(struct cache_extent *cache)
1318 struct ptr_node *node;
1319 struct inode_record *rec;
1321 node = container_of(cache, struct ptr_node, cache);
1323 free_inode_rec(rec);
1327 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1329 static struct shared_node *find_shared_node(struct cache_tree *shared,
1332 struct cache_extent *cache;
1333 struct shared_node *node;
1335 cache = lookup_cache_extent(shared, bytenr, 1);
1337 node = container_of(cache, struct shared_node, cache);
1343 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1346 struct shared_node *node;
1348 node = calloc(1, sizeof(*node));
1351 node->cache.start = bytenr;
1352 node->cache.size = 1;
1353 cache_tree_init(&node->root_cache);
1354 cache_tree_init(&node->inode_cache);
1357 ret = insert_cache_extent(shared, &node->cache);
1362 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->active_node)
1372 BUG_ON(wc->active_node <= level);
1373 node = find_shared_node(&wc->shared, bytenr);
1375 ret = add_shared_node(&wc->shared, bytenr, refs);
1377 node = find_shared_node(&wc->shared, bytenr);
1378 wc->nodes[level] = node;
1379 wc->active_node = level;
1383 if (wc->root_level == wc->active_node &&
1384 btrfs_root_refs(&root->root_item) == 0) {
1385 if (--node->refs == 0) {
1386 free_inode_recs_tree(&node->root_cache);
1387 free_inode_recs_tree(&node->inode_cache);
1388 remove_cache_extent(&wc->shared, &node->cache);
1394 dest = wc->nodes[wc->active_node];
1395 splice_shared_node(node, dest);
1396 if (node->refs == 0) {
1397 remove_cache_extent(&wc->shared, &node->cache);
1403 static int leave_shared_node(struct btrfs_root *root,
1404 struct walk_control *wc, int level)
1406 struct shared_node *node;
1407 struct shared_node *dest;
1410 if (level == wc->root_level)
1413 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1417 BUG_ON(i >= BTRFS_MAX_LEVEL);
1419 node = wc->nodes[wc->active_node];
1420 wc->nodes[wc->active_node] = NULL;
1421 wc->active_node = i;
1423 dest = wc->nodes[wc->active_node];
1424 if (wc->active_node < wc->root_level ||
1425 btrfs_root_refs(&root->root_item) > 0) {
1426 BUG_ON(node->refs <= 1);
1427 splice_shared_node(node, dest);
1429 BUG_ON(node->refs < 2);
1438 * 1 - if the root with id child_root_id is a child of root parent_root_id
1439 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1440 * has other root(s) as parent(s)
1441 * 2 - if the root child_root_id doesn't have any parent roots
1443 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1446 struct btrfs_path path;
1447 struct btrfs_key key;
1448 struct extent_buffer *leaf;
1452 btrfs_init_path(&path);
1454 key.objectid = parent_root_id;
1455 key.type = BTRFS_ROOT_REF_KEY;
1456 key.offset = child_root_id;
1457 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1461 btrfs_release_path(&path);
1465 key.objectid = child_root_id;
1466 key.type = BTRFS_ROOT_BACKREF_KEY;
1468 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1474 leaf = path.nodes[0];
1475 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1476 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1479 leaf = path.nodes[0];
1482 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1483 if (key.objectid != child_root_id ||
1484 key.type != BTRFS_ROOT_BACKREF_KEY)
1489 if (key.offset == parent_root_id) {
1490 btrfs_release_path(&path);
1497 btrfs_release_path(&path);
1500 return has_parent ? 0 : 2;
1503 static int process_dir_item(struct btrfs_root *root,
1504 struct extent_buffer *eb,
1505 int slot, struct btrfs_key *key,
1506 struct shared_node *active_node)
1516 struct btrfs_dir_item *di;
1517 struct inode_record *rec;
1518 struct cache_tree *root_cache;
1519 struct cache_tree *inode_cache;
1520 struct btrfs_key location;
1521 char namebuf[BTRFS_NAME_LEN];
1523 root_cache = &active_node->root_cache;
1524 inode_cache = &active_node->inode_cache;
1525 rec = active_node->current;
1526 rec->found_dir_item = 1;
1528 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1529 total = btrfs_item_size_nr(eb, slot);
1530 while (cur < total) {
1532 btrfs_dir_item_key_to_cpu(eb, di, &location);
1533 name_len = btrfs_dir_name_len(eb, di);
1534 data_len = btrfs_dir_data_len(eb, di);
1535 filetype = btrfs_dir_type(eb, di);
1537 rec->found_size += name_len;
1538 if (name_len <= BTRFS_NAME_LEN) {
1542 len = BTRFS_NAME_LEN;
1543 error = REF_ERR_NAME_TOO_LONG;
1545 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1547 if (location.type == BTRFS_INODE_ITEM_KEY) {
1548 add_inode_backref(inode_cache, location.objectid,
1549 key->objectid, key->offset, namebuf,
1550 len, filetype, key->type, error);
1551 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1552 add_inode_backref(root_cache, location.objectid,
1553 key->objectid, key->offset,
1554 namebuf, len, filetype,
1557 fprintf(stderr, "invalid location in dir item %u\n",
1559 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1560 key->objectid, key->offset, namebuf,
1561 len, filetype, key->type, error);
1564 len = sizeof(*di) + name_len + data_len;
1565 di = (struct btrfs_dir_item *)((char *)di + len);
1568 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1569 rec->errors |= I_ERR_DUP_DIR_INDEX;
1574 static int process_inode_ref(struct extent_buffer *eb,
1575 int slot, struct btrfs_key *key,
1576 struct shared_node *active_node)
1584 struct cache_tree *inode_cache;
1585 struct btrfs_inode_ref *ref;
1586 char namebuf[BTRFS_NAME_LEN];
1588 inode_cache = &active_node->inode_cache;
1590 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1591 total = btrfs_item_size_nr(eb, slot);
1592 while (cur < total) {
1593 name_len = btrfs_inode_ref_name_len(eb, ref);
1594 index = btrfs_inode_ref_index(eb, ref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, key->offset,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*ref) + name_len;
1607 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1613 static int process_inode_extref(struct extent_buffer *eb,
1614 int slot, struct btrfs_key *key,
1615 struct shared_node *active_node)
1624 struct cache_tree *inode_cache;
1625 struct btrfs_inode_extref *extref;
1626 char namebuf[BTRFS_NAME_LEN];
1628 inode_cache = &active_node->inode_cache;
1630 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631 total = btrfs_item_size_nr(eb, slot);
1632 while (cur < total) {
1633 name_len = btrfs_inode_extref_name_len(eb, extref);
1634 index = btrfs_inode_extref_index(eb, extref);
1635 parent = btrfs_inode_extref_parent(eb, extref);
1636 if (name_len <= BTRFS_NAME_LEN) {
1640 len = BTRFS_NAME_LEN;
1641 error = REF_ERR_NAME_TOO_LONG;
1643 read_extent_buffer(eb, namebuf,
1644 (unsigned long)(extref + 1), len);
1645 add_inode_backref(inode_cache, key->objectid, parent,
1646 index, namebuf, len, 0, key->type, error);
1648 len = sizeof(*extref) + name_len;
1649 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657 u64 len, u64 *found)
1659 struct btrfs_key key;
1660 struct btrfs_path path;
1661 struct extent_buffer *leaf;
1666 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668 btrfs_init_path(&path);
1670 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672 key.type = BTRFS_EXTENT_CSUM_KEY;
1674 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1678 if (ret > 0 && path.slots[0] > 0) {
1679 leaf = path.nodes[0];
1680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682 key.type == BTRFS_EXTENT_CSUM_KEY)
1687 leaf = path.nodes[0];
1688 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1694 leaf = path.nodes[0];
1697 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699 key.type != BTRFS_EXTENT_CSUM_KEY)
1702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703 if (key.offset >= start + len)
1706 if (key.offset > start)
1709 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(root, eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1883 static void reada_walk_down(struct btrfs_root *root,
1884 struct extent_buffer *node, int slot)
1893 level = btrfs_header_level(node);
1897 nritems = btrfs_header_nritems(node);
1898 blocksize = root->nodesize;
1899 for (i = slot; i < nritems; i++) {
1900 bytenr = btrfs_node_blockptr(node, i);
1901 ptr_gen = btrfs_node_ptr_generation(node, i);
1902 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1907 * Check the child node/leaf by the following condition:
1908 * 1. the first item key of the node/leaf should be the same with the one
1910 * 2. block in parent node should match the child node/leaf.
1911 * 3. generation of parent node and child's header should be consistent.
1913 * Or the child node/leaf pointed by the key in parent is not valid.
1915 * We hope to check leaf owner too, but since subvol may share leaves,
1916 * which makes leaf owner check not so strong, key check should be
1917 * sufficient enough for that case.
1919 static int check_child_node(struct btrfs_root *root,
1920 struct extent_buffer *parent, int slot,
1921 struct extent_buffer *child)
1923 struct btrfs_key parent_key;
1924 struct btrfs_key child_key;
1927 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1928 if (btrfs_header_level(child) == 0)
1929 btrfs_item_key_to_cpu(child, &child_key, 0);
1931 btrfs_node_key_to_cpu(child, &child_key, 0);
1933 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1936 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1937 parent_key.objectid, parent_key.type, parent_key.offset,
1938 child_key.objectid, child_key.type, child_key.offset);
1940 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1942 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1943 btrfs_node_blockptr(parent, slot),
1944 btrfs_header_bytenr(child));
1946 if (btrfs_node_ptr_generation(parent, slot) !=
1947 btrfs_header_generation(child)) {
1949 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1950 btrfs_header_generation(child),
1951 btrfs_node_ptr_generation(parent, slot));
1956 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1957 struct walk_control *wc, int *level)
1959 enum btrfs_tree_block_status status;
1962 struct extent_buffer *next;
1963 struct extent_buffer *cur;
1968 WARN_ON(*level < 0);
1969 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1970 ret = btrfs_lookup_extent_info(NULL, root,
1971 path->nodes[*level]->start,
1972 *level, 1, &refs, NULL);
1979 ret = enter_shared_node(root, path->nodes[*level]->start,
1987 while (*level >= 0) {
1988 WARN_ON(*level < 0);
1989 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1990 cur = path->nodes[*level];
1992 if (btrfs_header_level(cur) != *level)
1995 if (path->slots[*level] >= btrfs_header_nritems(cur))
1998 ret = process_one_leaf(root, cur, wc);
2003 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2004 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2005 blocksize = root->nodesize;
2006 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2012 ret = enter_shared_node(root, bytenr, refs,
2015 path->slots[*level]++;
2020 next = btrfs_find_tree_block(root, bytenr, blocksize);
2021 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2022 free_extent_buffer(next);
2023 reada_walk_down(root, cur, path->slots[*level]);
2024 next = read_tree_block(root, bytenr, blocksize,
2026 if (!extent_buffer_uptodate(next)) {
2027 struct btrfs_key node_key;
2029 btrfs_node_key_to_cpu(path->nodes[*level],
2031 path->slots[*level]);
2032 btrfs_add_corrupt_extent_record(root->fs_info,
2034 path->nodes[*level]->start,
2035 root->nodesize, *level);
2041 ret = check_child_node(root, cur, path->slots[*level], next);
2047 if (btrfs_is_leaf(next))
2048 status = btrfs_check_leaf(root, NULL, next);
2050 status = btrfs_check_node(root, NULL, next);
2051 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2052 free_extent_buffer(next);
2057 *level = *level - 1;
2058 free_extent_buffer(path->nodes[*level]);
2059 path->nodes[*level] = next;
2060 path->slots[*level] = 0;
2063 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2067 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2068 struct walk_control *wc, int *level)
2071 struct extent_buffer *leaf;
2073 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2074 leaf = path->nodes[i];
2075 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2080 free_extent_buffer(path->nodes[*level]);
2081 path->nodes[*level] = NULL;
2082 BUG_ON(*level > wc->active_node);
2083 if (*level == wc->active_node)
2084 leave_shared_node(root, wc, *level);
2091 static int check_root_dir(struct inode_record *rec)
2093 struct inode_backref *backref;
2096 if (!rec->found_inode_item || rec->errors)
2098 if (rec->nlink != 1 || rec->found_link != 0)
2100 if (list_empty(&rec->backrefs))
2102 backref = to_inode_backref(rec->backrefs.next);
2103 if (!backref->found_inode_ref)
2105 if (backref->index != 0 || backref->namelen != 2 ||
2106 memcmp(backref->name, "..", 2))
2108 if (backref->found_dir_index || backref->found_dir_item)
2115 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2116 struct btrfs_root *root, struct btrfs_path *path,
2117 struct inode_record *rec)
2119 struct btrfs_inode_item *ei;
2120 struct btrfs_key key;
2123 key.objectid = rec->ino;
2124 key.type = BTRFS_INODE_ITEM_KEY;
2125 key.offset = (u64)-1;
2127 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2131 if (!path->slots[0]) {
2138 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2139 if (key.objectid != rec->ino) {
2144 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2145 struct btrfs_inode_item);
2146 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2147 btrfs_mark_buffer_dirty(path->nodes[0]);
2148 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2149 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2150 root->root_key.objectid);
2152 btrfs_release_path(path);
2156 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2157 struct btrfs_root *root,
2158 struct btrfs_path *path,
2159 struct inode_record *rec)
2163 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2164 btrfs_release_path(path);
2166 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2170 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2171 struct btrfs_root *root,
2172 struct btrfs_path *path,
2173 struct inode_record *rec)
2175 struct btrfs_inode_item *ei;
2176 struct btrfs_key key;
2179 key.objectid = rec->ino;
2180 key.type = BTRFS_INODE_ITEM_KEY;
2183 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2190 /* Since ret == 0, no need to check anything */
2191 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2192 struct btrfs_inode_item);
2193 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2194 btrfs_mark_buffer_dirty(path->nodes[0]);
2195 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2196 printf("reset nbytes for ino %llu root %llu\n",
2197 rec->ino, root->root_key.objectid);
2199 btrfs_release_path(path);
2203 static int add_missing_dir_index(struct btrfs_root *root,
2204 struct cache_tree *inode_cache,
2205 struct inode_record *rec,
2206 struct inode_backref *backref)
2208 struct btrfs_path *path;
2209 struct btrfs_trans_handle *trans;
2210 struct btrfs_dir_item *dir_item;
2211 struct extent_buffer *leaf;
2212 struct btrfs_key key;
2213 struct btrfs_disk_key disk_key;
2214 struct inode_record *dir_rec;
2215 unsigned long name_ptr;
2216 u32 data_size = sizeof(*dir_item) + backref->namelen;
2219 path = btrfs_alloc_path();
2223 trans = btrfs_start_transaction(root, 1);
2224 if (IS_ERR(trans)) {
2225 btrfs_free_path(path);
2226 return PTR_ERR(trans);
2229 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2230 (unsigned long long)rec->ino);
2231 key.objectid = backref->dir;
2232 key.type = BTRFS_DIR_INDEX_KEY;
2233 key.offset = backref->index;
2235 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2238 leaf = path->nodes[0];
2239 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2241 disk_key.objectid = cpu_to_le64(rec->ino);
2242 disk_key.type = BTRFS_INODE_ITEM_KEY;
2243 disk_key.offset = 0;
2245 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2246 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2247 btrfs_set_dir_data_len(leaf, dir_item, 0);
2248 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2249 name_ptr = (unsigned long)(dir_item + 1);
2250 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2251 btrfs_mark_buffer_dirty(leaf);
2252 btrfs_free_path(path);
2253 btrfs_commit_transaction(trans, root);
2255 backref->found_dir_index = 1;
2256 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2257 BUG_ON(IS_ERR(dir_rec));
2260 dir_rec->found_size += backref->namelen;
2261 if (dir_rec->found_size == dir_rec->isize &&
2262 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2263 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2264 if (dir_rec->found_size != dir_rec->isize)
2265 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2270 static int delete_dir_index(struct btrfs_root *root,
2271 struct cache_tree *inode_cache,
2272 struct inode_record *rec,
2273 struct inode_backref *backref)
2275 struct btrfs_trans_handle *trans;
2276 struct btrfs_dir_item *di;
2277 struct btrfs_path *path;
2280 path = btrfs_alloc_path();
2284 trans = btrfs_start_transaction(root, 1);
2285 if (IS_ERR(trans)) {
2286 btrfs_free_path(path);
2287 return PTR_ERR(trans);
2291 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2292 (unsigned long long)backref->dir,
2293 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2294 (unsigned long long)root->objectid);
2296 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2297 backref->name, backref->namelen,
2298 backref->index, -1);
2301 btrfs_free_path(path);
2302 btrfs_commit_transaction(trans, root);
2309 ret = btrfs_del_item(trans, root, path);
2311 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2313 btrfs_free_path(path);
2314 btrfs_commit_transaction(trans, root);
2318 static int create_inode_item(struct btrfs_root *root,
2319 struct inode_record *rec,
2320 struct inode_backref *backref, int root_dir)
2322 struct btrfs_trans_handle *trans;
2323 struct btrfs_inode_item inode_item;
2324 time_t now = time(NULL);
2327 trans = btrfs_start_transaction(root, 1);
2328 if (IS_ERR(trans)) {
2329 ret = PTR_ERR(trans);
2333 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2334 "be incomplete, please check permissions and content after "
2335 "the fsck completes.\n", (unsigned long long)root->objectid,
2336 (unsigned long long)rec->ino);
2338 memset(&inode_item, 0, sizeof(inode_item));
2339 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2341 btrfs_set_stack_inode_nlink(&inode_item, 1);
2343 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2344 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2345 if (rec->found_dir_item) {
2346 if (rec->found_file_extent)
2347 fprintf(stderr, "root %llu inode %llu has both a dir "
2348 "item and extents, unsure if it is a dir or a "
2349 "regular file so setting it as a directory\n",
2350 (unsigned long long)root->objectid,
2351 (unsigned long long)rec->ino);
2352 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2353 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2354 } else if (!rec->found_dir_item) {
2355 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2356 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2358 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2359 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2360 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2361 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2362 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2363 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2364 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2365 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2367 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2369 btrfs_commit_transaction(trans, root);
2373 static int repair_inode_backrefs(struct btrfs_root *root,
2374 struct inode_record *rec,
2375 struct cache_tree *inode_cache,
2378 struct inode_backref *tmp, *backref;
2379 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2383 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2384 if (!delete && rec->ino == root_dirid) {
2385 if (!rec->found_inode_item) {
2386 ret = create_inode_item(root, rec, backref, 1);
2393 /* Index 0 for root dir's are special, don't mess with it */
2394 if (rec->ino == root_dirid && backref->index == 0)
2398 ((backref->found_dir_index && !backref->found_inode_ref) ||
2399 (backref->found_dir_index && backref->found_inode_ref &&
2400 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2401 ret = delete_dir_index(root, inode_cache, rec, backref);
2405 list_del(&backref->list);
2409 if (!delete && !backref->found_dir_index &&
2410 backref->found_dir_item && backref->found_inode_ref) {
2411 ret = add_missing_dir_index(root, inode_cache, rec,
2416 if (backref->found_dir_item &&
2417 backref->found_dir_index &&
2418 backref->found_dir_index) {
2419 if (!backref->errors &&
2420 backref->found_inode_ref) {
2421 list_del(&backref->list);
2427 if (!delete && (!backref->found_dir_index &&
2428 !backref->found_dir_item &&
2429 backref->found_inode_ref)) {
2430 struct btrfs_trans_handle *trans;
2431 struct btrfs_key location;
2433 ret = check_dir_conflict(root, backref->name,
2439 * let nlink fixing routine to handle it,
2440 * which can do it better.
2445 location.objectid = rec->ino;
2446 location.type = BTRFS_INODE_ITEM_KEY;
2447 location.offset = 0;
2449 trans = btrfs_start_transaction(root, 1);
2450 if (IS_ERR(trans)) {
2451 ret = PTR_ERR(trans);
2454 fprintf(stderr, "adding missing dir index/item pair "
2456 (unsigned long long)rec->ino);
2457 ret = btrfs_insert_dir_item(trans, root, backref->name,
2459 backref->dir, &location,
2460 imode_to_type(rec->imode),
2463 btrfs_commit_transaction(trans, root);
2467 if (!delete && (backref->found_inode_ref &&
2468 backref->found_dir_index &&
2469 backref->found_dir_item &&
2470 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2471 !rec->found_inode_item)) {
2472 ret = create_inode_item(root, rec, backref, 0);
2479 return ret ? ret : repaired;
2483 * To determine the file type for nlink/inode_item repair
2485 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2486 * Return -ENOENT if file type is not found.
2488 static int find_file_type(struct inode_record *rec, u8 *type)
2490 struct inode_backref *backref;
2492 /* For inode item recovered case */
2493 if (rec->found_inode_item) {
2494 *type = imode_to_type(rec->imode);
2498 list_for_each_entry(backref, &rec->backrefs, list) {
2499 if (backref->found_dir_index || backref->found_dir_item) {
2500 *type = backref->filetype;
2508 * To determine the file name for nlink repair
2510 * Return 0 if file name is found, set name and namelen.
2511 * Return -ENOENT if file name is not found.
2513 static int find_file_name(struct inode_record *rec,
2514 char *name, int *namelen)
2516 struct inode_backref *backref;
2518 list_for_each_entry(backref, &rec->backrefs, list) {
2519 if (backref->found_dir_index || backref->found_dir_item ||
2520 backref->found_inode_ref) {
2521 memcpy(name, backref->name, backref->namelen);
2522 *namelen = backref->namelen;
2529 /* Reset the nlink of the inode to the correct one */
2530 static int reset_nlink(struct btrfs_trans_handle *trans,
2531 struct btrfs_root *root,
2532 struct btrfs_path *path,
2533 struct inode_record *rec)
2535 struct inode_backref *backref;
2536 struct inode_backref *tmp;
2537 struct btrfs_key key;
2538 struct btrfs_inode_item *inode_item;
2541 /* We don't believe this either, reset it and iterate backref */
2542 rec->found_link = 0;
2544 /* Remove all backref including the valid ones */
2545 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2546 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2547 backref->index, backref->name,
2548 backref->namelen, 0);
2552 /* remove invalid backref, so it won't be added back */
2553 if (!(backref->found_dir_index &&
2554 backref->found_dir_item &&
2555 backref->found_inode_ref)) {
2556 list_del(&backref->list);
2563 /* Set nlink to 0 */
2564 key.objectid = rec->ino;
2565 key.type = BTRFS_INODE_ITEM_KEY;
2567 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2574 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2575 struct btrfs_inode_item);
2576 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2577 btrfs_mark_buffer_dirty(path->nodes[0]);
2578 btrfs_release_path(path);
2581 * Add back valid inode_ref/dir_item/dir_index,
2582 * add_link() will handle the nlink inc, so new nlink must be correct
2584 list_for_each_entry(backref, &rec->backrefs, list) {
2585 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2586 backref->name, backref->namelen,
2587 backref->filetype, &backref->index, 1);
2592 btrfs_release_path(path);
2596 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2597 struct btrfs_root *root,
2598 struct btrfs_path *path,
2599 struct inode_record *rec)
2601 char *dir_name = "lost+found";
2602 char namebuf[BTRFS_NAME_LEN] = {0};
2607 int name_recovered = 0;
2608 int type_recovered = 0;
2612 * Get file name and type first before these invalid inode ref
2613 * are deleted by remove_all_invalid_backref()
2615 name_recovered = !find_file_name(rec, namebuf, &namelen);
2616 type_recovered = !find_file_type(rec, &type);
2618 if (!name_recovered) {
2619 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2620 rec->ino, rec->ino);
2621 namelen = count_digits(rec->ino);
2622 sprintf(namebuf, "%llu", rec->ino);
2625 if (!type_recovered) {
2626 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2628 type = BTRFS_FT_REG_FILE;
2632 ret = reset_nlink(trans, root, path, rec);
2635 "Failed to reset nlink for inode %llu: %s\n",
2636 rec->ino, strerror(-ret));
2640 if (rec->found_link == 0) {
2641 lost_found_ino = root->highest_inode;
2642 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2647 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2648 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2651 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2652 dir_name, strerror(-ret));
2655 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2656 namebuf, namelen, type, NULL, 1);
2658 * Add ".INO" suffix several times to handle case where
2659 * "FILENAME.INO" is already taken by another file.
2661 while (ret == -EEXIST) {
2663 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2665 if (namelen + count_digits(rec->ino) + 1 >
2670 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2672 namelen += count_digits(rec->ino) + 1;
2673 ret = btrfs_add_link(trans, root, rec->ino,
2674 lost_found_ino, namebuf,
2675 namelen, type, NULL, 1);
2679 "Failed to link the inode %llu to %s dir: %s\n",
2680 rec->ino, dir_name, strerror(-ret));
2684 * Just increase the found_link, don't actually add the
2685 * backref. This will make things easier and this inode
2686 * record will be freed after the repair is done.
2687 * So fsck will not report problem about this inode.
2690 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2691 namelen, namebuf, dir_name);
2693 printf("Fixed the nlink of inode %llu\n", rec->ino);
2696 * Clear the flag anyway, or we will loop forever for the same inode
2697 * as it will not be removed from the bad inode list and the dead loop
2700 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2701 btrfs_release_path(path);
2706 * Check if there is any normal(reg or prealloc) file extent for given
2708 * This is used to determine the file type when neither its dir_index/item or
2709 * inode_item exists.
2711 * This will *NOT* report error, if any error happens, just consider it does
2712 * not have any normal file extent.
2714 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2716 struct btrfs_path *path;
2717 struct btrfs_key key;
2718 struct btrfs_key found_key;
2719 struct btrfs_file_extent_item *fi;
2723 path = btrfs_alloc_path();
2727 key.type = BTRFS_EXTENT_DATA_KEY;
2730 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2735 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2736 ret = btrfs_next_leaf(root, path);
2743 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2745 if (found_key.objectid != ino ||
2746 found_key.type != BTRFS_EXTENT_DATA_KEY)
2748 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2749 struct btrfs_file_extent_item);
2750 type = btrfs_file_extent_type(path->nodes[0], fi);
2751 if (type != BTRFS_FILE_EXTENT_INLINE) {
2757 btrfs_free_path(path);
2761 static u32 btrfs_type_to_imode(u8 type)
2763 static u32 imode_by_btrfs_type[] = {
2764 [BTRFS_FT_REG_FILE] = S_IFREG,
2765 [BTRFS_FT_DIR] = S_IFDIR,
2766 [BTRFS_FT_CHRDEV] = S_IFCHR,
2767 [BTRFS_FT_BLKDEV] = S_IFBLK,
2768 [BTRFS_FT_FIFO] = S_IFIFO,
2769 [BTRFS_FT_SOCK] = S_IFSOCK,
2770 [BTRFS_FT_SYMLINK] = S_IFLNK,
2773 return imode_by_btrfs_type[(type)];
2776 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2777 struct btrfs_root *root,
2778 struct btrfs_path *path,
2779 struct inode_record *rec)
2783 int type_recovered = 0;
2786 printf("Trying to rebuild inode:%llu\n", rec->ino);
2788 type_recovered = !find_file_type(rec, &filetype);
2791 * Try to determine inode type if type not found.
2793 * For found regular file extent, it must be FILE.
2794 * For found dir_item/index, it must be DIR.
2796 * For undetermined one, use FILE as fallback.
2799 * 1. If found backref(inode_index/item is already handled) to it,
2801 * Need new inode-inode ref structure to allow search for that.
2803 if (!type_recovered) {
2804 if (rec->found_file_extent &&
2805 find_normal_file_extent(root, rec->ino)) {
2807 filetype = BTRFS_FT_REG_FILE;
2808 } else if (rec->found_dir_item) {
2810 filetype = BTRFS_FT_DIR;
2811 } else if (!list_empty(&rec->orphan_extents)) {
2813 filetype = BTRFS_FT_REG_FILE;
2815 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2818 filetype = BTRFS_FT_REG_FILE;
2822 ret = btrfs_new_inode(trans, root, rec->ino,
2823 mode | btrfs_type_to_imode(filetype));
2828 * Here inode rebuild is done, we only rebuild the inode item,
2829 * don't repair the nlink(like move to lost+found).
2830 * That is the job of nlink repair.
2832 * We just fill the record and return
2834 rec->found_dir_item = 1;
2835 rec->imode = mode | btrfs_type_to_imode(filetype);
2837 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2838 /* Ensure the inode_nlinks repair function will be called */
2839 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2844 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2845 struct btrfs_root *root,
2846 struct btrfs_path *path,
2847 struct inode_record *rec)
2849 struct orphan_data_extent *orphan;
2850 struct orphan_data_extent *tmp;
2853 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2855 * Check for conflicting file extents
2857 * Here we don't know whether the extents is compressed or not,
2858 * so we can only assume it not compressed nor data offset,
2859 * and use its disk_len as extent length.
2861 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2862 orphan->offset, orphan->disk_len, 0);
2863 btrfs_release_path(path);
2868 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2869 orphan->disk_bytenr, orphan->disk_len);
2870 ret = btrfs_free_extent(trans,
2871 root->fs_info->extent_root,
2872 orphan->disk_bytenr, orphan->disk_len,
2873 0, root->objectid, orphan->objectid,
2878 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2879 orphan->offset, orphan->disk_bytenr,
2880 orphan->disk_len, orphan->disk_len);
2884 /* Update file size info */
2885 rec->found_size += orphan->disk_len;
2886 if (rec->found_size == rec->nbytes)
2887 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2889 /* Update the file extent hole info too */
2890 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2894 if (RB_EMPTY_ROOT(&rec->holes))
2895 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2897 list_del(&orphan->list);
2900 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2905 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2906 struct btrfs_root *root,
2907 struct btrfs_path *path,
2908 struct inode_record *rec)
2910 struct rb_node *node;
2911 struct file_extent_hole *hole;
2915 node = rb_first(&rec->holes);
2919 hole = rb_entry(node, struct file_extent_hole, node);
2920 ret = btrfs_punch_hole(trans, root, rec->ino,
2921 hole->start, hole->len);
2924 ret = del_file_extent_hole(&rec->holes, hole->start,
2928 if (RB_EMPTY_ROOT(&rec->holes))
2929 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2930 node = rb_first(&rec->holes);
2932 /* special case for a file losing all its file extent */
2934 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2935 round_up(rec->isize, root->sectorsize));
2939 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2940 rec->ino, root->objectid);
2945 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2947 struct btrfs_trans_handle *trans;
2948 struct btrfs_path *path;
2951 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2952 I_ERR_NO_ORPHAN_ITEM |
2953 I_ERR_LINK_COUNT_WRONG |
2954 I_ERR_NO_INODE_ITEM |
2955 I_ERR_FILE_EXTENT_ORPHAN |
2956 I_ERR_FILE_EXTENT_DISCOUNT|
2957 I_ERR_FILE_NBYTES_WRONG)))
2960 path = btrfs_alloc_path();
2965 * For nlink repair, it may create a dir and add link, so
2966 * 2 for parent(256)'s dir_index and dir_item
2967 * 2 for lost+found dir's inode_item and inode_ref
2968 * 1 for the new inode_ref of the file
2969 * 2 for lost+found dir's dir_index and dir_item for the file
2971 trans = btrfs_start_transaction(root, 7);
2972 if (IS_ERR(trans)) {
2973 btrfs_free_path(path);
2974 return PTR_ERR(trans);
2977 if (rec->errors & I_ERR_NO_INODE_ITEM)
2978 ret = repair_inode_no_item(trans, root, path, rec);
2979 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2980 ret = repair_inode_orphan_extent(trans, root, path, rec);
2981 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2982 ret = repair_inode_discount_extent(trans, root, path, rec);
2983 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2984 ret = repair_inode_isize(trans, root, path, rec);
2985 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2986 ret = repair_inode_orphan_item(trans, root, path, rec);
2987 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2988 ret = repair_inode_nlinks(trans, root, path, rec);
2989 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2990 ret = repair_inode_nbytes(trans, root, path, rec);
2991 btrfs_commit_transaction(trans, root);
2992 btrfs_free_path(path);
2996 static int check_inode_recs(struct btrfs_root *root,
2997 struct cache_tree *inode_cache)
2999 struct cache_extent *cache;
3000 struct ptr_node *node;
3001 struct inode_record *rec;
3002 struct inode_backref *backref;
3007 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3009 if (btrfs_root_refs(&root->root_item) == 0) {
3010 if (!cache_tree_empty(inode_cache))
3011 fprintf(stderr, "warning line %d\n", __LINE__);
3016 * We need to record the highest inode number for later 'lost+found'
3018 * We must select an ino not used/referred by any existing inode, or
3019 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3020 * this may cause 'lost+found' dir has wrong nlinks.
3022 cache = last_cache_extent(inode_cache);
3024 node = container_of(cache, struct ptr_node, cache);
3026 if (rec->ino > root->highest_inode)
3027 root->highest_inode = rec->ino;
3031 * We need to repair backrefs first because we could change some of the
3032 * errors in the inode recs.
3034 * We also need to go through and delete invalid backrefs first and then
3035 * add the correct ones second. We do this because we may get EEXIST
3036 * when adding back the correct index because we hadn't yet deleted the
3039 * For example, if we were missing a dir index then the directories
3040 * isize would be wrong, so if we fixed the isize to what we thought it
3041 * would be and then fixed the backref we'd still have a invalid fs, so
3042 * we need to add back the dir index and then check to see if the isize
3047 if (stage == 3 && !err)
3050 cache = search_cache_extent(inode_cache, 0);
3051 while (repair && cache) {
3052 node = container_of(cache, struct ptr_node, cache);
3054 cache = next_cache_extent(cache);
3056 /* Need to free everything up and rescan */
3058 remove_cache_extent(inode_cache, &node->cache);
3060 free_inode_rec(rec);
3064 if (list_empty(&rec->backrefs))
3067 ret = repair_inode_backrefs(root, rec, inode_cache,
3081 rec = get_inode_rec(inode_cache, root_dirid, 0);
3082 BUG_ON(IS_ERR(rec));
3084 ret = check_root_dir(rec);
3086 fprintf(stderr, "root %llu root dir %llu error\n",
3087 (unsigned long long)root->root_key.objectid,
3088 (unsigned long long)root_dirid);
3089 print_inode_error(root, rec);
3094 struct btrfs_trans_handle *trans;
3096 trans = btrfs_start_transaction(root, 1);
3097 if (IS_ERR(trans)) {
3098 err = PTR_ERR(trans);
3103 "root %llu missing its root dir, recreating\n",
3104 (unsigned long long)root->objectid);
3106 ret = btrfs_make_root_dir(trans, root, root_dirid);
3109 btrfs_commit_transaction(trans, root);
3113 fprintf(stderr, "root %llu root dir %llu not found\n",
3114 (unsigned long long)root->root_key.objectid,
3115 (unsigned long long)root_dirid);
3119 cache = search_cache_extent(inode_cache, 0);
3122 node = container_of(cache, struct ptr_node, cache);
3124 remove_cache_extent(inode_cache, &node->cache);
3126 if (rec->ino == root_dirid ||
3127 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3128 free_inode_rec(rec);
3132 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3133 ret = check_orphan_item(root, rec->ino);
3135 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3136 if (can_free_inode_rec(rec)) {
3137 free_inode_rec(rec);
3142 if (!rec->found_inode_item)
3143 rec->errors |= I_ERR_NO_INODE_ITEM;
3144 if (rec->found_link != rec->nlink)
3145 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3147 ret = try_repair_inode(root, rec);
3148 if (ret == 0 && can_free_inode_rec(rec)) {
3149 free_inode_rec(rec);
3155 if (!(repair && ret == 0))
3157 print_inode_error(root, rec);
3158 list_for_each_entry(backref, &rec->backrefs, list) {
3159 if (!backref->found_dir_item)
3160 backref->errors |= REF_ERR_NO_DIR_ITEM;
3161 if (!backref->found_dir_index)
3162 backref->errors |= REF_ERR_NO_DIR_INDEX;
3163 if (!backref->found_inode_ref)
3164 backref->errors |= REF_ERR_NO_INODE_REF;
3165 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3166 " namelen %u name %s filetype %d errors %x",
3167 (unsigned long long)backref->dir,
3168 (unsigned long long)backref->index,
3169 backref->namelen, backref->name,
3170 backref->filetype, backref->errors);
3171 print_ref_error(backref->errors);
3173 free_inode_rec(rec);
3175 return (error > 0) ? -1 : 0;
3178 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3181 struct cache_extent *cache;
3182 struct root_record *rec = NULL;
3185 cache = lookup_cache_extent(root_cache, objectid, 1);
3187 rec = container_of(cache, struct root_record, cache);
3189 rec = calloc(1, sizeof(*rec));
3191 return ERR_PTR(-ENOMEM);
3192 rec->objectid = objectid;
3193 INIT_LIST_HEAD(&rec->backrefs);
3194 rec->cache.start = objectid;
3195 rec->cache.size = 1;
3197 ret = insert_cache_extent(root_cache, &rec->cache);
3199 return ERR_PTR(-EEXIST);
3204 static struct root_backref *get_root_backref(struct root_record *rec,
3205 u64 ref_root, u64 dir, u64 index,
3206 const char *name, int namelen)
3208 struct root_backref *backref;
3210 list_for_each_entry(backref, &rec->backrefs, list) {
3211 if (backref->ref_root != ref_root || backref->dir != dir ||
3212 backref->namelen != namelen)
3214 if (memcmp(name, backref->name, namelen))
3219 backref = calloc(1, sizeof(*backref) + namelen + 1);
3222 backref->ref_root = ref_root;
3224 backref->index = index;
3225 backref->namelen = namelen;
3226 memcpy(backref->name, name, namelen);
3227 backref->name[namelen] = '\0';
3228 list_add_tail(&backref->list, &rec->backrefs);
3232 static void free_root_record(struct cache_extent *cache)
3234 struct root_record *rec;
3235 struct root_backref *backref;
3237 rec = container_of(cache, struct root_record, cache);
3238 while (!list_empty(&rec->backrefs)) {
3239 backref = to_root_backref(rec->backrefs.next);
3240 list_del(&backref->list);
3247 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3249 static int add_root_backref(struct cache_tree *root_cache,
3250 u64 root_id, u64 ref_root, u64 dir, u64 index,
3251 const char *name, int namelen,
3252 int item_type, int errors)
3254 struct root_record *rec;
3255 struct root_backref *backref;
3257 rec = get_root_rec(root_cache, root_id);
3258 BUG_ON(IS_ERR(rec));
3259 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3262 backref->errors |= errors;
3264 if (item_type != BTRFS_DIR_ITEM_KEY) {
3265 if (backref->found_dir_index || backref->found_back_ref ||
3266 backref->found_forward_ref) {
3267 if (backref->index != index)
3268 backref->errors |= REF_ERR_INDEX_UNMATCH;
3270 backref->index = index;
3274 if (item_type == BTRFS_DIR_ITEM_KEY) {
3275 if (backref->found_forward_ref)
3277 backref->found_dir_item = 1;
3278 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3279 backref->found_dir_index = 1;
3280 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3281 if (backref->found_forward_ref)
3282 backref->errors |= REF_ERR_DUP_ROOT_REF;
3283 else if (backref->found_dir_item)
3285 backref->found_forward_ref = 1;
3286 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3287 if (backref->found_back_ref)
3288 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3289 backref->found_back_ref = 1;
3294 if (backref->found_forward_ref && backref->found_dir_item)
3295 backref->reachable = 1;
3299 static int merge_root_recs(struct btrfs_root *root,
3300 struct cache_tree *src_cache,
3301 struct cache_tree *dst_cache)
3303 struct cache_extent *cache;
3304 struct ptr_node *node;
3305 struct inode_record *rec;
3306 struct inode_backref *backref;
3309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3310 free_inode_recs_tree(src_cache);
3315 cache = search_cache_extent(src_cache, 0);
3318 node = container_of(cache, struct ptr_node, cache);
3320 remove_cache_extent(src_cache, &node->cache);
3323 ret = is_child_root(root, root->objectid, rec->ino);
3329 list_for_each_entry(backref, &rec->backrefs, list) {
3330 BUG_ON(backref->found_inode_ref);
3331 if (backref->found_dir_item)
3332 add_root_backref(dst_cache, rec->ino,
3333 root->root_key.objectid, backref->dir,
3334 backref->index, backref->name,
3335 backref->namelen, BTRFS_DIR_ITEM_KEY,
3337 if (backref->found_dir_index)
3338 add_root_backref(dst_cache, rec->ino,
3339 root->root_key.objectid, backref->dir,
3340 backref->index, backref->name,
3341 backref->namelen, BTRFS_DIR_INDEX_KEY,
3345 free_inode_rec(rec);
3352 static int check_root_refs(struct btrfs_root *root,
3353 struct cache_tree *root_cache)
3355 struct root_record *rec;
3356 struct root_record *ref_root;
3357 struct root_backref *backref;
3358 struct cache_extent *cache;
3364 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3365 BUG_ON(IS_ERR(rec));
3368 /* fixme: this can not detect circular references */
3371 cache = search_cache_extent(root_cache, 0);
3375 rec = container_of(cache, struct root_record, cache);
3376 cache = next_cache_extent(cache);
3378 if (rec->found_ref == 0)
3381 list_for_each_entry(backref, &rec->backrefs, list) {
3382 if (!backref->reachable)
3385 ref_root = get_root_rec(root_cache,
3387 BUG_ON(IS_ERR(ref_root));
3388 if (ref_root->found_ref > 0)
3391 backref->reachable = 0;
3393 if (rec->found_ref == 0)
3399 cache = search_cache_extent(root_cache, 0);
3403 rec = container_of(cache, struct root_record, cache);
3404 cache = next_cache_extent(cache);
3406 if (rec->found_ref == 0 &&
3407 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3408 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3409 ret = check_orphan_item(root->fs_info->tree_root,
3415 * If we don't have a root item then we likely just have
3416 * a dir item in a snapshot for this root but no actual
3417 * ref key or anything so it's meaningless.
3419 if (!rec->found_root_item)
3422 fprintf(stderr, "fs tree %llu not referenced\n",
3423 (unsigned long long)rec->objectid);
3427 if (rec->found_ref > 0 && !rec->found_root_item)
3429 list_for_each_entry(backref, &rec->backrefs, list) {
3430 if (!backref->found_dir_item)
3431 backref->errors |= REF_ERR_NO_DIR_ITEM;
3432 if (!backref->found_dir_index)
3433 backref->errors |= REF_ERR_NO_DIR_INDEX;
3434 if (!backref->found_back_ref)
3435 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3436 if (!backref->found_forward_ref)
3437 backref->errors |= REF_ERR_NO_ROOT_REF;
3438 if (backref->reachable && backref->errors)
3445 fprintf(stderr, "fs tree %llu refs %u %s\n",
3446 (unsigned long long)rec->objectid, rec->found_ref,
3447 rec->found_root_item ? "" : "not found");
3449 list_for_each_entry(backref, &rec->backrefs, list) {
3450 if (!backref->reachable)
3452 if (!backref->errors && rec->found_root_item)
3454 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3455 " index %llu namelen %u name %s errors %x\n",
3456 (unsigned long long)backref->ref_root,
3457 (unsigned long long)backref->dir,
3458 (unsigned long long)backref->index,
3459 backref->namelen, backref->name,
3461 print_ref_error(backref->errors);
3464 return errors > 0 ? 1 : 0;
3467 static int process_root_ref(struct extent_buffer *eb, int slot,
3468 struct btrfs_key *key,
3469 struct cache_tree *root_cache)
3475 struct btrfs_root_ref *ref;
3476 char namebuf[BTRFS_NAME_LEN];
3479 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3481 dirid = btrfs_root_ref_dirid(eb, ref);
3482 index = btrfs_root_ref_sequence(eb, ref);
3483 name_len = btrfs_root_ref_name_len(eb, ref);
3485 if (name_len <= BTRFS_NAME_LEN) {
3489 len = BTRFS_NAME_LEN;
3490 error = REF_ERR_NAME_TOO_LONG;
3492 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3494 if (key->type == BTRFS_ROOT_REF_KEY) {
3495 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3496 index, namebuf, len, key->type, error);
3498 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3499 index, namebuf, len, key->type, error);
3504 static void free_corrupt_block(struct cache_extent *cache)
3506 struct btrfs_corrupt_block *corrupt;
3508 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3512 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3515 * Repair the btree of the given root.
3517 * The fix is to remove the node key in corrupt_blocks cache_tree.
3518 * and rebalance the tree.
3519 * After the fix, the btree should be writeable.
3521 static int repair_btree(struct btrfs_root *root,
3522 struct cache_tree *corrupt_blocks)
3524 struct btrfs_trans_handle *trans;
3525 struct btrfs_path *path;
3526 struct btrfs_corrupt_block *corrupt;
3527 struct cache_extent *cache;
3528 struct btrfs_key key;
3533 if (cache_tree_empty(corrupt_blocks))
3536 path = btrfs_alloc_path();
3540 trans = btrfs_start_transaction(root, 1);
3541 if (IS_ERR(trans)) {
3542 ret = PTR_ERR(trans);
3543 fprintf(stderr, "Error starting transaction: %s\n",
3547 cache = first_cache_extent(corrupt_blocks);
3549 corrupt = container_of(cache, struct btrfs_corrupt_block,
3551 level = corrupt->level;
3552 path->lowest_level = level;
3553 key.objectid = corrupt->key.objectid;
3554 key.type = corrupt->key.type;
3555 key.offset = corrupt->key.offset;
3558 * Here we don't want to do any tree balance, since it may
3559 * cause a balance with corrupted brother leaf/node,
3560 * so ins_len set to 0 here.
3561 * Balance will be done after all corrupt node/leaf is deleted.
3563 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3566 offset = btrfs_node_blockptr(path->nodes[level],
3567 path->slots[level]);
3569 /* Remove the ptr */
3570 ret = btrfs_del_ptr(trans, root, path, level,
3571 path->slots[level]);
3575 * Remove the corresponding extent
3576 * return value is not concerned.
3578 btrfs_release_path(path);
3579 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3580 0, root->root_key.objectid,
3582 cache = next_cache_extent(cache);
3585 /* Balance the btree using btrfs_search_slot() */
3586 cache = first_cache_extent(corrupt_blocks);
3588 corrupt = container_of(cache, struct btrfs_corrupt_block,
3590 memcpy(&key, &corrupt->key, sizeof(key));
3591 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3594 /* return will always >0 since it won't find the item */
3596 btrfs_release_path(path);
3597 cache = next_cache_extent(cache);
3600 btrfs_commit_transaction(trans, root);
3602 btrfs_free_path(path);
3606 static int check_fs_root(struct btrfs_root *root,
3607 struct cache_tree *root_cache,
3608 struct walk_control *wc)
3614 struct btrfs_path path;
3615 struct shared_node root_node;
3616 struct root_record *rec;
3617 struct btrfs_root_item *root_item = &root->root_item;
3618 struct cache_tree corrupt_blocks;
3619 struct orphan_data_extent *orphan;
3620 struct orphan_data_extent *tmp;
3621 enum btrfs_tree_block_status status;
3624 * Reuse the corrupt_block cache tree to record corrupted tree block
3626 * Unlike the usage in extent tree check, here we do it in a per
3627 * fs/subvol tree base.
3629 cache_tree_init(&corrupt_blocks);
3630 root->fs_info->corrupt_blocks = &corrupt_blocks;
3632 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3633 rec = get_root_rec(root_cache, root->root_key.objectid);
3634 BUG_ON(IS_ERR(rec));
3635 if (btrfs_root_refs(root_item) > 0)
3636 rec->found_root_item = 1;
3639 btrfs_init_path(&path);
3640 memset(&root_node, 0, sizeof(root_node));
3641 cache_tree_init(&root_node.root_cache);
3642 cache_tree_init(&root_node.inode_cache);
3644 /* Move the orphan extent record to corresponding inode_record */
3645 list_for_each_entry_safe(orphan, tmp,
3646 &root->orphan_data_extents, list) {
3647 struct inode_record *inode;
3649 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3651 BUG_ON(IS_ERR(inode));
3652 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3653 list_move(&orphan->list, &inode->orphan_extents);
3656 level = btrfs_header_level(root->node);
3657 memset(wc->nodes, 0, sizeof(wc->nodes));
3658 wc->nodes[level] = &root_node;
3659 wc->active_node = level;
3660 wc->root_level = level;
3662 /* We may not have checked the root block, lets do that now */
3663 if (btrfs_is_leaf(root->node))
3664 status = btrfs_check_leaf(root, NULL, root->node);
3666 status = btrfs_check_node(root, NULL, root->node);
3667 if (status != BTRFS_TREE_BLOCK_CLEAN)
3670 if (btrfs_root_refs(root_item) > 0 ||
3671 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3672 path.nodes[level] = root->node;
3673 extent_buffer_get(root->node);
3674 path.slots[level] = 0;
3676 struct btrfs_key key;
3677 struct btrfs_disk_key found_key;
3679 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3680 level = root_item->drop_level;
3681 path.lowest_level = level;
3682 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3685 btrfs_node_key(path.nodes[level], &found_key,
3687 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3688 sizeof(found_key)));
3692 wret = walk_down_tree(root, &path, wc, &level);
3698 wret = walk_up_tree(root, &path, wc, &level);
3705 btrfs_release_path(&path);
3707 if (!cache_tree_empty(&corrupt_blocks)) {
3708 struct cache_extent *cache;
3709 struct btrfs_corrupt_block *corrupt;
3711 printf("The following tree block(s) is corrupted in tree %llu:\n",
3712 root->root_key.objectid);
3713 cache = first_cache_extent(&corrupt_blocks);
3715 corrupt = container_of(cache,
3716 struct btrfs_corrupt_block,
3718 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3719 cache->start, corrupt->level,
3720 corrupt->key.objectid, corrupt->key.type,
3721 corrupt->key.offset);
3722 cache = next_cache_extent(cache);
3725 printf("Try to repair the btree for root %llu\n",
3726 root->root_key.objectid);
3727 ret = repair_btree(root, &corrupt_blocks);
3729 fprintf(stderr, "Failed to repair btree: %s\n",
3732 printf("Btree for root %llu is fixed\n",
3733 root->root_key.objectid);
3737 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3741 if (root_node.current) {
3742 root_node.current->checked = 1;
3743 maybe_free_inode_rec(&root_node.inode_cache,
3747 err = check_inode_recs(root, &root_node.inode_cache);
3751 free_corrupt_blocks_tree(&corrupt_blocks);
3752 root->fs_info->corrupt_blocks = NULL;
3753 free_orphan_data_extents(&root->orphan_data_extents);
3757 static int fs_root_objectid(u64 objectid)
3759 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3760 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3762 return is_fstree(objectid);
3765 static int check_fs_roots(struct btrfs_root *root,
3766 struct cache_tree *root_cache)
3768 struct btrfs_path path;
3769 struct btrfs_key key;
3770 struct walk_control wc;
3771 struct extent_buffer *leaf, *tree_node;
3772 struct btrfs_root *tmp_root;
3773 struct btrfs_root *tree_root = root->fs_info->tree_root;
3777 if (ctx.progress_enabled) {
3778 ctx.tp = TASK_FS_ROOTS;
3779 task_start(ctx.info);
3783 * Just in case we made any changes to the extent tree that weren't
3784 * reflected into the free space cache yet.
3787 reset_cached_block_groups(root->fs_info);
3788 memset(&wc, 0, sizeof(wc));
3789 cache_tree_init(&wc.shared);
3790 btrfs_init_path(&path);
3795 key.type = BTRFS_ROOT_ITEM_KEY;
3796 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3801 tree_node = tree_root->node;
3803 if (tree_node != tree_root->node) {
3804 free_root_recs_tree(root_cache);
3805 btrfs_release_path(&path);
3808 leaf = path.nodes[0];
3809 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3810 ret = btrfs_next_leaf(tree_root, &path);
3816 leaf = path.nodes[0];
3818 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3819 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3820 fs_root_objectid(key.objectid)) {
3821 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3822 tmp_root = btrfs_read_fs_root_no_cache(
3823 root->fs_info, &key);
3825 key.offset = (u64)-1;
3826 tmp_root = btrfs_read_fs_root(
3827 root->fs_info, &key);
3829 if (IS_ERR(tmp_root)) {
3833 ret = check_fs_root(tmp_root, root_cache, &wc);
3834 if (ret == -EAGAIN) {
3835 free_root_recs_tree(root_cache);
3836 btrfs_release_path(&path);
3841 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3842 btrfs_free_fs_root(tmp_root);
3843 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3844 key.type == BTRFS_ROOT_BACKREF_KEY) {
3845 process_root_ref(leaf, path.slots[0], &key,
3852 btrfs_release_path(&path);
3854 free_extent_cache_tree(&wc.shared);
3855 if (!cache_tree_empty(&wc.shared))
3856 fprintf(stderr, "warning line %d\n", __LINE__);
3858 task_stop(ctx.info);
3863 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3866 struct extent_backref *back;
3867 struct tree_backref *tback;
3868 struct data_backref *dback;
3872 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3873 back = rb_node_to_extent_backref(n);
3874 if (!back->found_extent_tree) {
3878 if (back->is_data) {
3879 dback = to_data_backref(back);
3880 fprintf(stderr, "Backref %llu %s %llu"
3881 " owner %llu offset %llu num_refs %lu"
3882 " not found in extent tree\n",
3883 (unsigned long long)rec->start,
3884 back->full_backref ?
3886 back->full_backref ?
3887 (unsigned long long)dback->parent:
3888 (unsigned long long)dback->root,
3889 (unsigned long long)dback->owner,
3890 (unsigned long long)dback->offset,
3891 (unsigned long)dback->num_refs);
3893 tback = to_tree_backref(back);
3894 fprintf(stderr, "Backref %llu parent %llu"
3895 " root %llu not found in extent tree\n",
3896 (unsigned long long)rec->start,
3897 (unsigned long long)tback->parent,
3898 (unsigned long long)tback->root);
3901 if (!back->is_data && !back->found_ref) {
3905 tback = to_tree_backref(back);
3906 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3907 (unsigned long long)rec->start,
3908 back->full_backref ? "parent" : "root",
3909 back->full_backref ?
3910 (unsigned long long)tback->parent :
3911 (unsigned long long)tback->root, back);
3913 if (back->is_data) {
3914 dback = to_data_backref(back);
3915 if (dback->found_ref != dback->num_refs) {
3919 fprintf(stderr, "Incorrect local backref count"
3920 " on %llu %s %llu owner %llu"
3921 " offset %llu found %u wanted %u back %p\n",
3922 (unsigned long long)rec->start,
3923 back->full_backref ?
3925 back->full_backref ?
3926 (unsigned long long)dback->parent:
3927 (unsigned long long)dback->root,
3928 (unsigned long long)dback->owner,
3929 (unsigned long long)dback->offset,
3930 dback->found_ref, dback->num_refs, back);
3932 if (dback->disk_bytenr != rec->start) {
3936 fprintf(stderr, "Backref disk bytenr does not"
3937 " match extent record, bytenr=%llu, "
3938 "ref bytenr=%llu\n",
3939 (unsigned long long)rec->start,
3940 (unsigned long long)dback->disk_bytenr);
3943 if (dback->bytes != rec->nr) {
3947 fprintf(stderr, "Backref bytes do not match "
3948 "extent backref, bytenr=%llu, ref "
3949 "bytes=%llu, backref bytes=%llu\n",
3950 (unsigned long long)rec->start,
3951 (unsigned long long)rec->nr,
3952 (unsigned long long)dback->bytes);
3955 if (!back->is_data) {
3958 dback = to_data_backref(back);
3959 found += dback->found_ref;
3962 if (found != rec->refs) {
3966 fprintf(stderr, "Incorrect global backref count "
3967 "on %llu found %llu wanted %llu\n",
3968 (unsigned long long)rec->start,
3969 (unsigned long long)found,
3970 (unsigned long long)rec->refs);
3976 static void __free_one_backref(struct rb_node *node)
3978 struct extent_backref *back = rb_node_to_extent_backref(node);
3983 static void free_all_extent_backrefs(struct extent_record *rec)
3985 rb_free_nodes(&rec->backref_tree, __free_one_backref);
3988 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3989 struct cache_tree *extent_cache)
3991 struct cache_extent *cache;
3992 struct extent_record *rec;
3995 cache = first_cache_extent(extent_cache);
3998 rec = container_of(cache, struct extent_record, cache);
3999 remove_cache_extent(extent_cache, cache);
4000 free_all_extent_backrefs(rec);
4005 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4006 struct extent_record *rec)
4008 if (rec->content_checked && rec->owner_ref_checked &&
4009 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4010 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4011 !rec->bad_full_backref && !rec->crossing_stripes &&
4012 !rec->wrong_chunk_type) {
4013 remove_cache_extent(extent_cache, &rec->cache);
4014 free_all_extent_backrefs(rec);
4015 list_del_init(&rec->list);
4021 static int check_owner_ref(struct btrfs_root *root,
4022 struct extent_record *rec,
4023 struct extent_buffer *buf)
4025 struct extent_backref *node, *tmp;
4026 struct tree_backref *back;
4027 struct btrfs_root *ref_root;
4028 struct btrfs_key key;
4029 struct btrfs_path path;
4030 struct extent_buffer *parent;
4035 rbtree_postorder_for_each_entry_safe(node, tmp,
4036 &rec->backref_tree, node) {
4039 if (!node->found_ref)
4041 if (node->full_backref)
4043 back = to_tree_backref(node);
4044 if (btrfs_header_owner(buf) == back->root)
4047 BUG_ON(rec->is_root);
4049 /* try to find the block by search corresponding fs tree */
4050 key.objectid = btrfs_header_owner(buf);
4051 key.type = BTRFS_ROOT_ITEM_KEY;
4052 key.offset = (u64)-1;
4054 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4055 if (IS_ERR(ref_root))
4058 level = btrfs_header_level(buf);
4060 btrfs_item_key_to_cpu(buf, &key, 0);
4062 btrfs_node_key_to_cpu(buf, &key, 0);
4064 btrfs_init_path(&path);
4065 path.lowest_level = level + 1;
4066 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4070 parent = path.nodes[level + 1];
4071 if (parent && buf->start == btrfs_node_blockptr(parent,
4072 path.slots[level + 1]))
4075 btrfs_release_path(&path);
4076 return found ? 0 : 1;
4079 static int is_extent_tree_record(struct extent_record *rec)
4081 struct extent_backref *ref, *tmp;
4082 struct tree_backref *back;
4085 rbtree_postorder_for_each_entry_safe(ref, tmp,
4086 &rec->backref_tree, node) {
4089 back = to_tree_backref(ref);
4090 if (ref->full_backref)
4092 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4099 static int record_bad_block_io(struct btrfs_fs_info *info,
4100 struct cache_tree *extent_cache,
4103 struct extent_record *rec;
4104 struct cache_extent *cache;
4105 struct btrfs_key key;
4107 cache = lookup_cache_extent(extent_cache, start, len);
4111 rec = container_of(cache, struct extent_record, cache);
4112 if (!is_extent_tree_record(rec))
4115 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4116 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4119 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4120 struct extent_buffer *buf, int slot)
4122 if (btrfs_header_level(buf)) {
4123 struct btrfs_key_ptr ptr1, ptr2;
4125 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4126 sizeof(struct btrfs_key_ptr));
4127 read_extent_buffer(buf, &ptr2,
4128 btrfs_node_key_ptr_offset(slot + 1),
4129 sizeof(struct btrfs_key_ptr));
4130 write_extent_buffer(buf, &ptr1,
4131 btrfs_node_key_ptr_offset(slot + 1),
4132 sizeof(struct btrfs_key_ptr));
4133 write_extent_buffer(buf, &ptr2,
4134 btrfs_node_key_ptr_offset(slot),
4135 sizeof(struct btrfs_key_ptr));
4137 struct btrfs_disk_key key;
4138 btrfs_node_key(buf, &key, 0);
4139 btrfs_fixup_low_keys(root, path, &key,
4140 btrfs_header_level(buf) + 1);
4143 struct btrfs_item *item1, *item2;
4144 struct btrfs_key k1, k2;
4145 char *item1_data, *item2_data;
4146 u32 item1_offset, item2_offset, item1_size, item2_size;
4148 item1 = btrfs_item_nr(slot);
4149 item2 = btrfs_item_nr(slot + 1);
4150 btrfs_item_key_to_cpu(buf, &k1, slot);
4151 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4152 item1_offset = btrfs_item_offset(buf, item1);
4153 item2_offset = btrfs_item_offset(buf, item2);
4154 item1_size = btrfs_item_size(buf, item1);
4155 item2_size = btrfs_item_size(buf, item2);
4157 item1_data = malloc(item1_size);
4160 item2_data = malloc(item2_size);
4166 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4167 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4169 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4170 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4174 btrfs_set_item_offset(buf, item1, item2_offset);
4175 btrfs_set_item_offset(buf, item2, item1_offset);
4176 btrfs_set_item_size(buf, item1, item2_size);
4177 btrfs_set_item_size(buf, item2, item1_size);
4179 path->slots[0] = slot;
4180 btrfs_set_item_key_unsafe(root, path, &k2);
4181 path->slots[0] = slot + 1;
4182 btrfs_set_item_key_unsafe(root, path, &k1);
4187 static int fix_key_order(struct btrfs_trans_handle *trans,
4188 struct btrfs_root *root,
4189 struct btrfs_path *path)
4191 struct extent_buffer *buf;
4192 struct btrfs_key k1, k2;
4194 int level = path->lowest_level;
4197 buf = path->nodes[level];
4198 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4200 btrfs_node_key_to_cpu(buf, &k1, i);
4201 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4203 btrfs_item_key_to_cpu(buf, &k1, i);
4204 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4206 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4208 ret = swap_values(root, path, buf, i);
4211 btrfs_mark_buffer_dirty(buf);
4217 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4218 struct btrfs_root *root,
4219 struct btrfs_path *path,
4220 struct extent_buffer *buf, int slot)
4222 struct btrfs_key key;
4223 int nritems = btrfs_header_nritems(buf);
4225 btrfs_item_key_to_cpu(buf, &key, slot);
4227 /* These are all the keys we can deal with missing. */
4228 if (key.type != BTRFS_DIR_INDEX_KEY &&
4229 key.type != BTRFS_EXTENT_ITEM_KEY &&
4230 key.type != BTRFS_METADATA_ITEM_KEY &&
4231 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4232 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4235 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4236 (unsigned long long)key.objectid, key.type,
4237 (unsigned long long)key.offset, slot, buf->start);
4238 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4239 btrfs_item_nr_offset(slot + 1),
4240 sizeof(struct btrfs_item) *
4241 (nritems - slot - 1));
4242 btrfs_set_header_nritems(buf, nritems - 1);
4244 struct btrfs_disk_key disk_key;
4246 btrfs_item_key(buf, &disk_key, 0);
4247 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4249 btrfs_mark_buffer_dirty(buf);
4253 static int fix_item_offset(struct btrfs_trans_handle *trans,
4254 struct btrfs_root *root,
4255 struct btrfs_path *path)
4257 struct extent_buffer *buf;
4261 /* We should only get this for leaves */
4262 BUG_ON(path->lowest_level);
4263 buf = path->nodes[0];
4265 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4266 unsigned int shift = 0, offset;
4268 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4269 BTRFS_LEAF_DATA_SIZE(root)) {
4270 if (btrfs_item_end_nr(buf, i) >
4271 BTRFS_LEAF_DATA_SIZE(root)) {
4272 ret = delete_bogus_item(trans, root, path,
4276 fprintf(stderr, "item is off the end of the "
4277 "leaf, can't fix\n");
4281 shift = BTRFS_LEAF_DATA_SIZE(root) -
4282 btrfs_item_end_nr(buf, i);
4283 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4284 btrfs_item_offset_nr(buf, i - 1)) {
4285 if (btrfs_item_end_nr(buf, i) >
4286 btrfs_item_offset_nr(buf, i - 1)) {
4287 ret = delete_bogus_item(trans, root, path,
4291 fprintf(stderr, "items overlap, can't fix\n");
4295 shift = btrfs_item_offset_nr(buf, i - 1) -
4296 btrfs_item_end_nr(buf, i);
4301 printf("Shifting item nr %d by %u bytes in block %llu\n",
4302 i, shift, (unsigned long long)buf->start);
4303 offset = btrfs_item_offset_nr(buf, i);
4304 memmove_extent_buffer(buf,
4305 btrfs_leaf_data(buf) + offset + shift,
4306 btrfs_leaf_data(buf) + offset,
4307 btrfs_item_size_nr(buf, i));
4308 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4310 btrfs_mark_buffer_dirty(buf);
4314 * We may have moved things, in which case we want to exit so we don't
4315 * write those changes out. Once we have proper abort functionality in
4316 * progs this can be changed to something nicer.
4323 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4324 * then just return -EIO.
4326 static int try_to_fix_bad_block(struct btrfs_root *root,
4327 struct extent_buffer *buf,
4328 enum btrfs_tree_block_status status)
4330 struct btrfs_trans_handle *trans;
4331 struct ulist *roots;
4332 struct ulist_node *node;
4333 struct btrfs_root *search_root;
4334 struct btrfs_path *path;
4335 struct ulist_iterator iter;
4336 struct btrfs_key root_key, key;
4339 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4340 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4343 path = btrfs_alloc_path();
4347 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4350 btrfs_free_path(path);
4354 ULIST_ITER_INIT(&iter);
4355 while ((node = ulist_next(roots, &iter))) {
4356 root_key.objectid = node->val;
4357 root_key.type = BTRFS_ROOT_ITEM_KEY;
4358 root_key.offset = (u64)-1;
4360 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4367 trans = btrfs_start_transaction(search_root, 0);
4368 if (IS_ERR(trans)) {
4369 ret = PTR_ERR(trans);
4373 path->lowest_level = btrfs_header_level(buf);
4374 path->skip_check_block = 1;
4375 if (path->lowest_level)
4376 btrfs_node_key_to_cpu(buf, &key, 0);
4378 btrfs_item_key_to_cpu(buf, &key, 0);
4379 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4382 btrfs_commit_transaction(trans, search_root);
4385 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4386 ret = fix_key_order(trans, search_root, path);
4387 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4388 ret = fix_item_offset(trans, search_root, path);
4390 btrfs_commit_transaction(trans, search_root);
4393 btrfs_release_path(path);
4394 btrfs_commit_transaction(trans, search_root);
4397 btrfs_free_path(path);
4401 static int check_block(struct btrfs_root *root,
4402 struct cache_tree *extent_cache,
4403 struct extent_buffer *buf, u64 flags)
4405 struct extent_record *rec;
4406 struct cache_extent *cache;
4407 struct btrfs_key key;
4408 enum btrfs_tree_block_status status;
4412 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4415 rec = container_of(cache, struct extent_record, cache);
4416 rec->generation = btrfs_header_generation(buf);
4418 level = btrfs_header_level(buf);
4419 if (btrfs_header_nritems(buf) > 0) {
4422 btrfs_item_key_to_cpu(buf, &key, 0);
4424 btrfs_node_key_to_cpu(buf, &key, 0);
4426 rec->info_objectid = key.objectid;
4428 rec->info_level = level;
4430 if (btrfs_is_leaf(buf))
4431 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4433 status = btrfs_check_node(root, &rec->parent_key, buf);
4435 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4437 status = try_to_fix_bad_block(root, buf, status);
4438 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4440 fprintf(stderr, "bad block %llu\n",
4441 (unsigned long long)buf->start);
4444 * Signal to callers we need to start the scan over
4445 * again since we'll have cowed blocks.
4450 rec->content_checked = 1;
4451 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4452 rec->owner_ref_checked = 1;
4454 ret = check_owner_ref(root, rec, buf);
4456 rec->owner_ref_checked = 1;
4460 maybe_free_extent_rec(extent_cache, rec);
4465 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4466 u64 parent, u64 root)
4468 struct rb_node *node;
4469 struct tree_backref *back = NULL;
4470 struct tree_backref match = {
4477 match.parent = parent;
4478 match.node.full_backref = 1;
4483 node = rb_search(&rec->backref_tree, &match.node.node,
4484 (rb_compare_keys)compare_extent_backref, NULL);
4486 back = to_tree_backref(rb_node_to_extent_backref(node));
4491 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4492 u64 parent, u64 root)
4494 struct tree_backref *ref = malloc(sizeof(*ref));
4498 memset(&ref->node, 0, sizeof(ref->node));
4500 ref->parent = parent;
4501 ref->node.full_backref = 1;
4504 ref->node.full_backref = 0;
4506 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4511 static struct data_backref *find_data_backref(struct extent_record *rec,
4512 u64 parent, u64 root,
4513 u64 owner, u64 offset,
4515 u64 disk_bytenr, u64 bytes)
4517 struct rb_node *node;
4518 struct data_backref *back = NULL;
4519 struct data_backref match = {
4526 .found_ref = found_ref,
4527 .disk_bytenr = disk_bytenr,
4531 match.parent = parent;
4532 match.node.full_backref = 1;
4537 node = rb_search(&rec->backref_tree, &match.node.node,
4538 (rb_compare_keys)compare_extent_backref, NULL);
4540 back = to_data_backref(rb_node_to_extent_backref(node));
4545 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4546 u64 parent, u64 root,
4547 u64 owner, u64 offset,
4550 struct data_backref *ref = malloc(sizeof(*ref));
4554 memset(&ref->node, 0, sizeof(ref->node));
4555 ref->node.is_data = 1;
4558 ref->parent = parent;
4561 ref->node.full_backref = 1;
4565 ref->offset = offset;
4566 ref->node.full_backref = 0;
4568 ref->bytes = max_size;
4571 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4572 if (max_size > rec->max_size)
4573 rec->max_size = max_size;
4577 /* Check if the type of extent matches with its chunk */
4578 static void check_extent_type(struct extent_record *rec)
4580 struct btrfs_block_group_cache *bg_cache;
4582 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4586 /* data extent, check chunk directly*/
4587 if (!rec->metadata) {
4588 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4589 rec->wrong_chunk_type = 1;
4593 /* metadata extent, check the obvious case first */
4594 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4595 BTRFS_BLOCK_GROUP_METADATA))) {
4596 rec->wrong_chunk_type = 1;
4601 * Check SYSTEM extent, as it's also marked as metadata, we can only
4602 * make sure it's a SYSTEM extent by its backref
4604 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4605 struct extent_backref *node;
4606 struct tree_backref *tback;
4609 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4610 if (node->is_data) {
4611 /* tree block shouldn't have data backref */
4612 rec->wrong_chunk_type = 1;
4615 tback = container_of(node, struct tree_backref, node);
4617 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4618 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4620 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4621 if (!(bg_cache->flags & bg_type))
4622 rec->wrong_chunk_type = 1;
4627 * Allocate a new extent record, fill default values from @tmpl and insert int
4628 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4629 * the cache, otherwise it fails.
4631 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4632 struct extent_record *tmpl)
4634 struct extent_record *rec;
4637 rec = malloc(sizeof(*rec));
4640 rec->start = tmpl->start;
4641 rec->max_size = tmpl->max_size;
4642 rec->nr = max(tmpl->nr, tmpl->max_size);
4643 rec->found_rec = tmpl->found_rec;
4644 rec->content_checked = tmpl->content_checked;
4645 rec->owner_ref_checked = tmpl->owner_ref_checked;
4646 rec->num_duplicates = 0;
4647 rec->metadata = tmpl->metadata;
4648 rec->flag_block_full_backref = FLAG_UNSET;
4649 rec->bad_full_backref = 0;
4650 rec->crossing_stripes = 0;
4651 rec->wrong_chunk_type = 0;
4652 rec->is_root = tmpl->is_root;
4653 rec->refs = tmpl->refs;
4654 rec->extent_item_refs = tmpl->extent_item_refs;
4655 rec->parent_generation = tmpl->parent_generation;
4656 INIT_LIST_HEAD(&rec->backrefs);
4657 INIT_LIST_HEAD(&rec->dups);
4658 INIT_LIST_HEAD(&rec->list);
4659 rec->backref_tree = RB_ROOT;
4660 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4661 rec->cache.start = tmpl->start;
4662 rec->cache.size = tmpl->nr;
4663 ret = insert_cache_extent(extent_cache, &rec->cache);
4665 bytes_used += rec->nr;
4668 rec->crossing_stripes = check_crossing_stripes(rec->start,
4669 global_info->tree_root->nodesize);
4670 check_extent_type(rec);
4675 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4677 * - refs - if found, increase refs
4678 * - is_root - if found, set
4679 * - content_checked - if found, set
4680 * - owner_ref_checked - if found, set
4682 * If not found, create a new one, initialize and insert.
4684 static int add_extent_rec(struct cache_tree *extent_cache,
4685 struct extent_record *tmpl)
4687 struct extent_record *rec;
4688 struct cache_extent *cache;
4692 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4694 rec = container_of(cache, struct extent_record, cache);
4698 rec->nr = max(tmpl->nr, tmpl->max_size);
4701 * We need to make sure to reset nr to whatever the extent
4702 * record says was the real size, this way we can compare it to
4705 if (tmpl->found_rec) {
4706 if (tmpl->start != rec->start || rec->found_rec) {
4707 struct extent_record *tmp;
4710 if (list_empty(&rec->list))
4711 list_add_tail(&rec->list,
4712 &duplicate_extents);
4715 * We have to do this song and dance in case we
4716 * find an extent record that falls inside of
4717 * our current extent record but does not have
4718 * the same objectid.
4720 tmp = malloc(sizeof(*tmp));
4723 tmp->start = tmpl->start;
4724 tmp->max_size = tmpl->max_size;
4727 tmp->metadata = tmpl->metadata;
4728 tmp->extent_item_refs = tmpl->extent_item_refs;
4729 INIT_LIST_HEAD(&tmp->list);
4730 list_add_tail(&tmp->list, &rec->dups);
4731 rec->num_duplicates++;
4738 if (tmpl->extent_item_refs && !dup) {
4739 if (rec->extent_item_refs) {
4740 fprintf(stderr, "block %llu rec "
4741 "extent_item_refs %llu, passed %llu\n",
4742 (unsigned long long)tmpl->start,
4743 (unsigned long long)
4744 rec->extent_item_refs,
4745 (unsigned long long)tmpl->extent_item_refs);
4747 rec->extent_item_refs = tmpl->extent_item_refs;
4751 if (tmpl->content_checked)
4752 rec->content_checked = 1;
4753 if (tmpl->owner_ref_checked)
4754 rec->owner_ref_checked = 1;
4755 memcpy(&rec->parent_key, &tmpl->parent_key,
4756 sizeof(tmpl->parent_key));
4757 if (tmpl->parent_generation)
4758 rec->parent_generation = tmpl->parent_generation;
4759 if (rec->max_size < tmpl->max_size)
4760 rec->max_size = tmpl->max_size;
4763 * A metadata extent can't cross stripe_len boundary, otherwise
4764 * kernel scrub won't be able to handle it.
4765 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4769 rec->crossing_stripes = check_crossing_stripes(
4770 rec->start, global_info->tree_root->nodesize);
4771 check_extent_type(rec);
4772 maybe_free_extent_rec(extent_cache, rec);
4776 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4781 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4782 u64 parent, u64 root, int found_ref)
4784 struct extent_record *rec;
4785 struct tree_backref *back;
4786 struct cache_extent *cache;
4788 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4790 struct extent_record tmpl;
4792 memset(&tmpl, 0, sizeof(tmpl));
4793 tmpl.start = bytenr;
4797 add_extent_rec_nolookup(extent_cache, &tmpl);
4799 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4804 rec = container_of(cache, struct extent_record, cache);
4805 if (rec->start != bytenr) {
4809 back = find_tree_backref(rec, parent, root);
4811 back = alloc_tree_backref(rec, parent, root);
4816 if (back->node.found_ref) {
4817 fprintf(stderr, "Extent back ref already exists "
4818 "for %llu parent %llu root %llu \n",
4819 (unsigned long long)bytenr,
4820 (unsigned long long)parent,
4821 (unsigned long long)root);
4823 back->node.found_ref = 1;
4825 if (back->node.found_extent_tree) {
4826 fprintf(stderr, "Extent back ref already exists "
4827 "for %llu parent %llu root %llu \n",
4828 (unsigned long long)bytenr,
4829 (unsigned long long)parent,
4830 (unsigned long long)root);
4832 back->node.found_extent_tree = 1;
4834 check_extent_type(rec);
4835 maybe_free_extent_rec(extent_cache, rec);
4839 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4840 u64 parent, u64 root, u64 owner, u64 offset,
4841 u32 num_refs, int found_ref, u64 max_size)
4843 struct extent_record *rec;
4844 struct data_backref *back;
4845 struct cache_extent *cache;
4847 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4849 struct extent_record tmpl;
4851 memset(&tmpl, 0, sizeof(tmpl));
4852 tmpl.start = bytenr;
4854 tmpl.max_size = max_size;
4856 add_extent_rec_nolookup(extent_cache, &tmpl);
4858 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4863 rec = container_of(cache, struct extent_record, cache);
4864 if (rec->max_size < max_size)
4865 rec->max_size = max_size;
4868 * If found_ref is set then max_size is the real size and must match the
4869 * existing refs. So if we have already found a ref then we need to
4870 * make sure that this ref matches the existing one, otherwise we need
4871 * to add a new backref so we can notice that the backrefs don't match
4872 * and we need to figure out who is telling the truth. This is to
4873 * account for that awful fsync bug I introduced where we'd end up with
4874 * a btrfs_file_extent_item that would have its length include multiple
4875 * prealloc extents or point inside of a prealloc extent.
4877 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4880 back = alloc_data_backref(rec, parent, root, owner, offset,
4886 BUG_ON(num_refs != 1);
4887 if (back->node.found_ref)
4888 BUG_ON(back->bytes != max_size);
4889 back->node.found_ref = 1;
4890 back->found_ref += 1;
4891 back->bytes = max_size;
4892 back->disk_bytenr = bytenr;
4894 rec->content_checked = 1;
4895 rec->owner_ref_checked = 1;
4897 if (back->node.found_extent_tree) {
4898 fprintf(stderr, "Extent back ref already exists "
4899 "for %llu parent %llu root %llu "
4900 "owner %llu offset %llu num_refs %lu\n",
4901 (unsigned long long)bytenr,
4902 (unsigned long long)parent,
4903 (unsigned long long)root,
4904 (unsigned long long)owner,
4905 (unsigned long long)offset,
4906 (unsigned long)num_refs);
4908 back->num_refs = num_refs;
4909 back->node.found_extent_tree = 1;
4911 maybe_free_extent_rec(extent_cache, rec);
4915 static int add_pending(struct cache_tree *pending,
4916 struct cache_tree *seen, u64 bytenr, u32 size)
4919 ret = add_cache_extent(seen, bytenr, size);
4922 add_cache_extent(pending, bytenr, size);
4926 static int pick_next_pending(struct cache_tree *pending,
4927 struct cache_tree *reada,
4928 struct cache_tree *nodes,
4929 u64 last, struct block_info *bits, int bits_nr,
4932 unsigned long node_start = last;
4933 struct cache_extent *cache;
4936 cache = search_cache_extent(reada, 0);
4938 bits[0].start = cache->start;
4939 bits[0].size = cache->size;
4944 if (node_start > 32768)
4945 node_start -= 32768;
4947 cache = search_cache_extent(nodes, node_start);
4949 cache = search_cache_extent(nodes, 0);
4952 cache = search_cache_extent(pending, 0);
4957 bits[ret].start = cache->start;
4958 bits[ret].size = cache->size;
4959 cache = next_cache_extent(cache);
4961 } while (cache && ret < bits_nr);
4967 bits[ret].start = cache->start;
4968 bits[ret].size = cache->size;
4969 cache = next_cache_extent(cache);
4971 } while (cache && ret < bits_nr);
4973 if (bits_nr - ret > 8) {
4974 u64 lookup = bits[0].start + bits[0].size;
4975 struct cache_extent *next;
4976 next = search_cache_extent(pending, lookup);
4978 if (next->start - lookup > 32768)
4980 bits[ret].start = next->start;
4981 bits[ret].size = next->size;
4982 lookup = next->start + next->size;
4986 next = next_cache_extent(next);
4994 static void free_chunk_record(struct cache_extent *cache)
4996 struct chunk_record *rec;
4998 rec = container_of(cache, struct chunk_record, cache);
4999 list_del_init(&rec->list);
5000 list_del_init(&rec->dextents);
5004 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5006 cache_tree_free_extents(chunk_cache, free_chunk_record);
5009 static void free_device_record(struct rb_node *node)
5011 struct device_record *rec;
5013 rec = container_of(node, struct device_record, node);
5017 FREE_RB_BASED_TREE(device_cache, free_device_record);
5019 int insert_block_group_record(struct block_group_tree *tree,
5020 struct block_group_record *bg_rec)
5024 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5028 list_add_tail(&bg_rec->list, &tree->block_groups);
5032 static void free_block_group_record(struct cache_extent *cache)
5034 struct block_group_record *rec;
5036 rec = container_of(cache, struct block_group_record, cache);
5037 list_del_init(&rec->list);
5041 void free_block_group_tree(struct block_group_tree *tree)
5043 cache_tree_free_extents(&tree->tree, free_block_group_record);
5046 int insert_device_extent_record(struct device_extent_tree *tree,
5047 struct device_extent_record *de_rec)
5052 * Device extent is a bit different from the other extents, because
5053 * the extents which belong to the different devices may have the
5054 * same start and size, so we need use the special extent cache
5055 * search/insert functions.
5057 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5061 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5062 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5066 static void free_device_extent_record(struct cache_extent *cache)
5068 struct device_extent_record *rec;
5070 rec = container_of(cache, struct device_extent_record, cache);
5071 if (!list_empty(&rec->chunk_list))
5072 list_del_init(&rec->chunk_list);
5073 if (!list_empty(&rec->device_list))
5074 list_del_init(&rec->device_list);
5078 void free_device_extent_tree(struct device_extent_tree *tree)
5080 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5084 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5085 struct extent_buffer *leaf, int slot)
5087 struct btrfs_extent_ref_v0 *ref0;
5088 struct btrfs_key key;
5090 btrfs_item_key_to_cpu(leaf, &key, slot);
5091 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5092 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5093 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5095 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5096 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5102 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5103 struct btrfs_key *key,
5106 struct btrfs_chunk *ptr;
5107 struct chunk_record *rec;
5110 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5111 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5113 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5115 fprintf(stderr, "memory allocation failed\n");
5119 INIT_LIST_HEAD(&rec->list);
5120 INIT_LIST_HEAD(&rec->dextents);
5123 rec->cache.start = key->offset;
5124 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5126 rec->generation = btrfs_header_generation(leaf);
5128 rec->objectid = key->objectid;
5129 rec->type = key->type;
5130 rec->offset = key->offset;
5132 rec->length = rec->cache.size;
5133 rec->owner = btrfs_chunk_owner(leaf, ptr);
5134 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5135 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5136 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5137 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5138 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5139 rec->num_stripes = num_stripes;
5140 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5142 for (i = 0; i < rec->num_stripes; ++i) {
5143 rec->stripes[i].devid =
5144 btrfs_stripe_devid_nr(leaf, ptr, i);
5145 rec->stripes[i].offset =
5146 btrfs_stripe_offset_nr(leaf, ptr, i);
5147 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5148 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5155 static int process_chunk_item(struct cache_tree *chunk_cache,
5156 struct btrfs_key *key, struct extent_buffer *eb,
5159 struct chunk_record *rec;
5162 rec = btrfs_new_chunk_record(eb, key, slot);
5163 ret = insert_cache_extent(chunk_cache, &rec->cache);
5165 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5166 rec->offset, rec->length);
5173 static int process_device_item(struct rb_root *dev_cache,
5174 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5176 struct btrfs_dev_item *ptr;
5177 struct device_record *rec;
5180 ptr = btrfs_item_ptr(eb,
5181 slot, struct btrfs_dev_item);
5183 rec = malloc(sizeof(*rec));
5185 fprintf(stderr, "memory allocation failed\n");
5189 rec->devid = key->offset;
5190 rec->generation = btrfs_header_generation(eb);
5192 rec->objectid = key->objectid;
5193 rec->type = key->type;
5194 rec->offset = key->offset;
5196 rec->devid = btrfs_device_id(eb, ptr);
5197 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5198 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5200 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5202 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5209 struct block_group_record *
5210 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5213 struct btrfs_block_group_item *ptr;
5214 struct block_group_record *rec;
5216 rec = calloc(1, sizeof(*rec));
5218 fprintf(stderr, "memory allocation failed\n");
5222 rec->cache.start = key->objectid;
5223 rec->cache.size = key->offset;
5225 rec->generation = btrfs_header_generation(leaf);
5227 rec->objectid = key->objectid;
5228 rec->type = key->type;
5229 rec->offset = key->offset;
5231 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5232 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5234 INIT_LIST_HEAD(&rec->list);
5239 static int process_block_group_item(struct block_group_tree *block_group_cache,
5240 struct btrfs_key *key,
5241 struct extent_buffer *eb, int slot)
5243 struct block_group_record *rec;
5246 rec = btrfs_new_block_group_record(eb, key, slot);
5247 ret = insert_block_group_record(block_group_cache, rec);
5249 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5250 rec->objectid, rec->offset);
5257 struct device_extent_record *
5258 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5259 struct btrfs_key *key, int slot)
5261 struct device_extent_record *rec;
5262 struct btrfs_dev_extent *ptr;
5264 rec = calloc(1, sizeof(*rec));
5266 fprintf(stderr, "memory allocation failed\n");
5270 rec->cache.objectid = key->objectid;
5271 rec->cache.start = key->offset;
5273 rec->generation = btrfs_header_generation(leaf);
5275 rec->objectid = key->objectid;
5276 rec->type = key->type;
5277 rec->offset = key->offset;
5279 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5280 rec->chunk_objecteid =
5281 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5283 btrfs_dev_extent_chunk_offset(leaf, ptr);
5284 rec->length = btrfs_dev_extent_length(leaf, ptr);
5285 rec->cache.size = rec->length;
5287 INIT_LIST_HEAD(&rec->chunk_list);
5288 INIT_LIST_HEAD(&rec->device_list);
5294 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5295 struct btrfs_key *key, struct extent_buffer *eb,
5298 struct device_extent_record *rec;
5301 rec = btrfs_new_device_extent_record(eb, key, slot);
5302 ret = insert_device_extent_record(dev_extent_cache, rec);
5305 "Device extent[%llu, %llu, %llu] existed.\n",
5306 rec->objectid, rec->offset, rec->length);
5313 static int process_extent_item(struct btrfs_root *root,
5314 struct cache_tree *extent_cache,
5315 struct extent_buffer *eb, int slot)
5317 struct btrfs_extent_item *ei;
5318 struct btrfs_extent_inline_ref *iref;
5319 struct btrfs_extent_data_ref *dref;
5320 struct btrfs_shared_data_ref *sref;
5321 struct btrfs_key key;
5322 struct extent_record tmpl;
5326 u32 item_size = btrfs_item_size_nr(eb, slot);
5332 btrfs_item_key_to_cpu(eb, &key, slot);
5334 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5336 num_bytes = root->nodesize;
5338 num_bytes = key.offset;
5341 if (item_size < sizeof(*ei)) {
5342 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5343 struct btrfs_extent_item_v0 *ei0;
5344 BUG_ON(item_size != sizeof(*ei0));
5345 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5346 refs = btrfs_extent_refs_v0(eb, ei0);
5350 memset(&tmpl, 0, sizeof(tmpl));
5351 tmpl.start = key.objectid;
5352 tmpl.nr = num_bytes;
5353 tmpl.extent_item_refs = refs;
5354 tmpl.metadata = metadata;
5356 tmpl.max_size = num_bytes;
5358 return add_extent_rec(extent_cache, &tmpl);
5361 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5362 refs = btrfs_extent_refs(eb, ei);
5363 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5368 memset(&tmpl, 0, sizeof(tmpl));
5369 tmpl.start = key.objectid;
5370 tmpl.nr = num_bytes;
5371 tmpl.extent_item_refs = refs;
5372 tmpl.metadata = metadata;
5374 tmpl.max_size = num_bytes;
5375 add_extent_rec(extent_cache, &tmpl);
5377 ptr = (unsigned long)(ei + 1);
5378 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5379 key.type == BTRFS_EXTENT_ITEM_KEY)
5380 ptr += sizeof(struct btrfs_tree_block_info);
5382 end = (unsigned long)ei + item_size;
5384 iref = (struct btrfs_extent_inline_ref *)ptr;
5385 type = btrfs_extent_inline_ref_type(eb, iref);
5386 offset = btrfs_extent_inline_ref_offset(eb, iref);
5388 case BTRFS_TREE_BLOCK_REF_KEY:
5389 add_tree_backref(extent_cache, key.objectid,
5392 case BTRFS_SHARED_BLOCK_REF_KEY:
5393 add_tree_backref(extent_cache, key.objectid,
5396 case BTRFS_EXTENT_DATA_REF_KEY:
5397 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5398 add_data_backref(extent_cache, key.objectid, 0,
5399 btrfs_extent_data_ref_root(eb, dref),
5400 btrfs_extent_data_ref_objectid(eb,
5402 btrfs_extent_data_ref_offset(eb, dref),
5403 btrfs_extent_data_ref_count(eb, dref),
5406 case BTRFS_SHARED_DATA_REF_KEY:
5407 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5408 add_data_backref(extent_cache, key.objectid, offset,
5410 btrfs_shared_data_ref_count(eb, sref),
5414 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5415 key.objectid, key.type, num_bytes);
5418 ptr += btrfs_extent_inline_ref_size(type);
5425 static int check_cache_range(struct btrfs_root *root,
5426 struct btrfs_block_group_cache *cache,
5427 u64 offset, u64 bytes)
5429 struct btrfs_free_space *entry;
5435 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5436 bytenr = btrfs_sb_offset(i);
5437 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5438 cache->key.objectid, bytenr, 0,
5439 &logical, &nr, &stripe_len);
5444 if (logical[nr] + stripe_len <= offset)
5446 if (offset + bytes <= logical[nr])
5448 if (logical[nr] == offset) {
5449 if (stripe_len >= bytes) {
5453 bytes -= stripe_len;
5454 offset += stripe_len;
5455 } else if (logical[nr] < offset) {
5456 if (logical[nr] + stripe_len >=
5461 bytes = (offset + bytes) -
5462 (logical[nr] + stripe_len);
5463 offset = logical[nr] + stripe_len;
5466 * Could be tricky, the super may land in the
5467 * middle of the area we're checking. First
5468 * check the easiest case, it's at the end.
5470 if (logical[nr] + stripe_len >=
5472 bytes = logical[nr] - offset;
5476 /* Check the left side */
5477 ret = check_cache_range(root, cache,
5479 logical[nr] - offset);
5485 /* Now we continue with the right side */
5486 bytes = (offset + bytes) -
5487 (logical[nr] + stripe_len);
5488 offset = logical[nr] + stripe_len;
5495 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5497 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5498 offset, offset+bytes);
5502 if (entry->offset != offset) {
5503 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5508 if (entry->bytes != bytes) {
5509 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5510 bytes, entry->bytes, offset);
5514 unlink_free_space(cache->free_space_ctl, entry);
5519 static int verify_space_cache(struct btrfs_root *root,
5520 struct btrfs_block_group_cache *cache)
5522 struct btrfs_path *path;
5523 struct extent_buffer *leaf;
5524 struct btrfs_key key;
5528 path = btrfs_alloc_path();
5532 root = root->fs_info->extent_root;
5534 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5536 key.objectid = last;
5538 key.type = BTRFS_EXTENT_ITEM_KEY;
5540 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5545 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5546 ret = btrfs_next_leaf(root, path);
5554 leaf = path->nodes[0];
5555 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5556 if (key.objectid >= cache->key.offset + cache->key.objectid)
5558 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5559 key.type != BTRFS_METADATA_ITEM_KEY) {
5564 if (last == key.objectid) {
5565 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5566 last = key.objectid + key.offset;
5568 last = key.objectid + root->nodesize;
5573 ret = check_cache_range(root, cache, last,
5574 key.objectid - last);
5577 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5578 last = key.objectid + key.offset;
5580 last = key.objectid + root->nodesize;
5584 if (last < cache->key.objectid + cache->key.offset)
5585 ret = check_cache_range(root, cache, last,
5586 cache->key.objectid +
5587 cache->key.offset - last);
5590 btrfs_free_path(path);
5593 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5594 fprintf(stderr, "There are still entries left in the space "
5602 static int check_space_cache(struct btrfs_root *root)
5604 struct btrfs_block_group_cache *cache;
5605 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5609 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5610 btrfs_super_generation(root->fs_info->super_copy) !=
5611 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5612 printf("cache and super generation don't match, space cache "
5613 "will be invalidated\n");
5617 if (ctx.progress_enabled) {
5618 ctx.tp = TASK_FREE_SPACE;
5619 task_start(ctx.info);
5623 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5627 start = cache->key.objectid + cache->key.offset;
5628 if (!cache->free_space_ctl) {
5629 if (btrfs_init_free_space_ctl(cache,
5630 root->sectorsize)) {
5635 btrfs_remove_free_space_cache(cache);
5638 if (btrfs_fs_compat_ro(root->fs_info,
5639 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5640 ret = exclude_super_stripes(root, cache);
5642 fprintf(stderr, "could not exclude super stripes: %s\n",
5647 ret = load_free_space_tree(root->fs_info, cache);
5648 free_excluded_extents(root, cache);
5650 fprintf(stderr, "could not load free space tree: %s\n",
5657 ret = load_free_space_cache(root->fs_info, cache);
5662 ret = verify_space_cache(root, cache);
5664 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5665 cache->key.objectid);
5670 task_stop(ctx.info);
5672 return error ? -EINVAL : 0;
5675 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5676 u64 num_bytes, unsigned long leaf_offset,
5677 struct extent_buffer *eb) {
5680 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5682 unsigned long csum_offset;
5686 u64 data_checked = 0;
5692 if (num_bytes % root->sectorsize)
5695 data = malloc(num_bytes);
5699 while (offset < num_bytes) {
5702 read_len = num_bytes - offset;
5703 /* read as much space once a time */
5704 ret = read_extent_data(root, data + offset,
5705 bytenr + offset, &read_len, mirror);
5709 /* verify every 4k data's checksum */
5710 while (data_checked < read_len) {
5712 tmp = offset + data_checked;
5714 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5715 csum, root->sectorsize);
5716 btrfs_csum_final(csum, (char *)&csum);
5718 csum_offset = leaf_offset +
5719 tmp / root->sectorsize * csum_size;
5720 read_extent_buffer(eb, (char *)&csum_expected,
5721 csum_offset, csum_size);
5722 /* try another mirror */
5723 if (csum != csum_expected) {
5724 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5725 mirror, bytenr + tmp,
5726 csum, csum_expected);
5727 num_copies = btrfs_num_copies(
5728 &root->fs_info->mapping_tree,
5730 if (mirror < num_copies - 1) {
5735 data_checked += root->sectorsize;
5744 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5747 struct btrfs_path *path;
5748 struct extent_buffer *leaf;
5749 struct btrfs_key key;
5752 path = btrfs_alloc_path();
5754 fprintf(stderr, "Error allocating path\n");
5758 key.objectid = bytenr;
5759 key.type = BTRFS_EXTENT_ITEM_KEY;
5760 key.offset = (u64)-1;
5763 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5766 fprintf(stderr, "Error looking up extent record %d\n", ret);
5767 btrfs_free_path(path);
5770 if (path->slots[0] > 0) {
5773 ret = btrfs_prev_leaf(root, path);
5776 } else if (ret > 0) {
5783 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5786 * Block group items come before extent items if they have the same
5787 * bytenr, so walk back one more just in case. Dear future traveller,
5788 * first congrats on mastering time travel. Now if it's not too much
5789 * trouble could you go back to 2006 and tell Chris to make the
5790 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5791 * EXTENT_ITEM_KEY please?
5793 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5794 if (path->slots[0] > 0) {
5797 ret = btrfs_prev_leaf(root, path);
5800 } else if (ret > 0) {
5805 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5809 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5810 ret = btrfs_next_leaf(root, path);
5812 fprintf(stderr, "Error going to next leaf "
5814 btrfs_free_path(path);
5820 leaf = path->nodes[0];
5821 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5822 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5826 if (key.objectid + key.offset < bytenr) {
5830 if (key.objectid > bytenr + num_bytes)
5833 if (key.objectid == bytenr) {
5834 if (key.offset >= num_bytes) {
5838 num_bytes -= key.offset;
5839 bytenr += key.offset;
5840 } else if (key.objectid < bytenr) {
5841 if (key.objectid + key.offset >= bytenr + num_bytes) {
5845 num_bytes = (bytenr + num_bytes) -
5846 (key.objectid + key.offset);
5847 bytenr = key.objectid + key.offset;
5849 if (key.objectid + key.offset < bytenr + num_bytes) {
5850 u64 new_start = key.objectid + key.offset;
5851 u64 new_bytes = bytenr + num_bytes - new_start;
5854 * Weird case, the extent is in the middle of
5855 * our range, we'll have to search one side
5856 * and then the other. Not sure if this happens
5857 * in real life, but no harm in coding it up
5858 * anyway just in case.
5860 btrfs_release_path(path);
5861 ret = check_extent_exists(root, new_start,
5864 fprintf(stderr, "Right section didn't "
5868 num_bytes = key.objectid - bytenr;
5871 num_bytes = key.objectid - bytenr;
5878 if (num_bytes && !ret) {
5879 fprintf(stderr, "There are no extents for csum range "
5880 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5884 btrfs_free_path(path);
5888 static int check_csums(struct btrfs_root *root)
5890 struct btrfs_path *path;
5891 struct extent_buffer *leaf;
5892 struct btrfs_key key;
5893 u64 offset = 0, num_bytes = 0;
5894 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5898 unsigned long leaf_offset;
5900 root = root->fs_info->csum_root;
5901 if (!extent_buffer_uptodate(root->node)) {
5902 fprintf(stderr, "No valid csum tree found\n");
5906 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5907 key.type = BTRFS_EXTENT_CSUM_KEY;
5910 path = btrfs_alloc_path();
5914 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5916 fprintf(stderr, "Error searching csum tree %d\n", ret);
5917 btrfs_free_path(path);
5921 if (ret > 0 && path->slots[0])
5926 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5927 ret = btrfs_next_leaf(root, path);
5929 fprintf(stderr, "Error going to next leaf "
5936 leaf = path->nodes[0];
5938 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5939 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5944 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5945 csum_size) * root->sectorsize;
5946 if (!check_data_csum)
5947 goto skip_csum_check;
5948 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5949 ret = check_extent_csums(root, key.offset, data_len,
5955 offset = key.offset;
5956 } else if (key.offset != offset + num_bytes) {
5957 ret = check_extent_exists(root, offset, num_bytes);
5959 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5960 "there is no extent record\n",
5961 offset, offset+num_bytes);
5964 offset = key.offset;
5967 num_bytes += data_len;
5971 btrfs_free_path(path);
5975 static int is_dropped_key(struct btrfs_key *key,
5976 struct btrfs_key *drop_key) {
5977 if (key->objectid < drop_key->objectid)
5979 else if (key->objectid == drop_key->objectid) {
5980 if (key->type < drop_key->type)
5982 else if (key->type == drop_key->type) {
5983 if (key->offset < drop_key->offset)
5991 * Here are the rules for FULL_BACKREF.
5993 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
5994 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
5996 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
5997 * if it happened after the relocation occurred since we'll have dropped the
5998 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
5999 * have no real way to know for sure.
6001 * We process the blocks one root at a time, and we start from the lowest root
6002 * objectid and go to the highest. So we can just lookup the owner backref for
6003 * the record and if we don't find it then we know it doesn't exist and we have
6006 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6007 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6008 * be set or not and then we can check later once we've gathered all the refs.
6010 static int calc_extent_flag(struct btrfs_root *root,
6011 struct cache_tree *extent_cache,
6012 struct extent_buffer *buf,
6013 struct root_item_record *ri,
6016 struct extent_record *rec;
6017 struct cache_extent *cache;
6018 struct tree_backref *tback;
6021 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6022 /* we have added this extent before */
6024 rec = container_of(cache, struct extent_record, cache);
6027 * Except file/reloc tree, we can not have
6030 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6035 if (buf->start == ri->bytenr)
6038 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6041 owner = btrfs_header_owner(buf);
6042 if (owner == ri->objectid)
6045 tback = find_tree_backref(rec, 0, owner);
6050 if (rec->flag_block_full_backref != FLAG_UNSET &&
6051 rec->flag_block_full_backref != 0)
6052 rec->bad_full_backref = 1;
6055 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6056 if (rec->flag_block_full_backref != FLAG_UNSET &&
6057 rec->flag_block_full_backref != 1)
6058 rec->bad_full_backref = 1;
6062 static int run_next_block(struct btrfs_root *root,
6063 struct block_info *bits,
6066 struct cache_tree *pending,
6067 struct cache_tree *seen,
6068 struct cache_tree *reada,
6069 struct cache_tree *nodes,
6070 struct cache_tree *extent_cache,
6071 struct cache_tree *chunk_cache,
6072 struct rb_root *dev_cache,
6073 struct block_group_tree *block_group_cache,
6074 struct device_extent_tree *dev_extent_cache,
6075 struct root_item_record *ri)
6077 struct extent_buffer *buf;
6078 struct extent_record *rec = NULL;
6089 struct btrfs_key key;
6090 struct cache_extent *cache;
6093 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6094 bits_nr, &reada_bits);
6099 for(i = 0; i < nritems; i++) {
6100 ret = add_cache_extent(reada, bits[i].start,
6105 /* fixme, get the parent transid */
6106 readahead_tree_block(root, bits[i].start,
6110 *last = bits[0].start;
6111 bytenr = bits[0].start;
6112 size = bits[0].size;
6114 cache = lookup_cache_extent(pending, bytenr, size);
6116 remove_cache_extent(pending, cache);
6119 cache = lookup_cache_extent(reada, bytenr, size);
6121 remove_cache_extent(reada, cache);
6124 cache = lookup_cache_extent(nodes, bytenr, size);
6126 remove_cache_extent(nodes, cache);
6129 cache = lookup_cache_extent(extent_cache, bytenr, size);
6131 rec = container_of(cache, struct extent_record, cache);
6132 gen = rec->parent_generation;
6135 /* fixme, get the real parent transid */
6136 buf = read_tree_block(root, bytenr, size, gen);
6137 if (!extent_buffer_uptodate(buf)) {
6138 record_bad_block_io(root->fs_info,
6139 extent_cache, bytenr, size);
6143 nritems = btrfs_header_nritems(buf);
6146 if (!init_extent_tree) {
6147 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6148 btrfs_header_level(buf), 1, NULL,
6151 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6153 fprintf(stderr, "Couldn't calc extent flags\n");
6154 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6159 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6161 fprintf(stderr, "Couldn't calc extent flags\n");
6162 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6166 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6168 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6169 ri->objectid == btrfs_header_owner(buf)) {
6171 * Ok we got to this block from it's original owner and
6172 * we have FULL_BACKREF set. Relocation can leave
6173 * converted blocks over so this is altogether possible,
6174 * however it's not possible if the generation > the
6175 * last snapshot, so check for this case.
6177 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6178 btrfs_header_generation(buf) > ri->last_snapshot) {
6179 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6180 rec->bad_full_backref = 1;
6185 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6186 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6187 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6188 rec->bad_full_backref = 1;
6192 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6193 rec->flag_block_full_backref = 1;
6197 rec->flag_block_full_backref = 0;
6199 owner = btrfs_header_owner(buf);
6202 ret = check_block(root, extent_cache, buf, flags);
6206 if (btrfs_is_leaf(buf)) {
6207 btree_space_waste += btrfs_leaf_free_space(root, buf);
6208 for (i = 0; i < nritems; i++) {
6209 struct btrfs_file_extent_item *fi;
6210 btrfs_item_key_to_cpu(buf, &key, i);
6211 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6212 process_extent_item(root, extent_cache, buf,
6216 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6217 process_extent_item(root, extent_cache, buf,
6221 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6223 btrfs_item_size_nr(buf, i);
6226 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6227 process_chunk_item(chunk_cache, &key, buf, i);
6230 if (key.type == BTRFS_DEV_ITEM_KEY) {
6231 process_device_item(dev_cache, &key, buf, i);
6234 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6235 process_block_group_item(block_group_cache,
6239 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6240 process_device_extent_item(dev_extent_cache,
6245 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6246 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6247 process_extent_ref_v0(extent_cache, buf, i);
6254 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6255 add_tree_backref(extent_cache, key.objectid, 0,
6259 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6260 add_tree_backref(extent_cache, key.objectid,
6264 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6265 struct btrfs_extent_data_ref *ref;
6266 ref = btrfs_item_ptr(buf, i,
6267 struct btrfs_extent_data_ref);
6268 add_data_backref(extent_cache,
6270 btrfs_extent_data_ref_root(buf, ref),
6271 btrfs_extent_data_ref_objectid(buf,
6273 btrfs_extent_data_ref_offset(buf, ref),
6274 btrfs_extent_data_ref_count(buf, ref),
6275 0, root->sectorsize);
6278 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6279 struct btrfs_shared_data_ref *ref;
6280 ref = btrfs_item_ptr(buf, i,
6281 struct btrfs_shared_data_ref);
6282 add_data_backref(extent_cache,
6283 key.objectid, key.offset, 0, 0, 0,
6284 btrfs_shared_data_ref_count(buf, ref),
6285 0, root->sectorsize);
6288 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6289 struct bad_item *bad;
6291 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6295 bad = malloc(sizeof(struct bad_item));
6298 INIT_LIST_HEAD(&bad->list);
6299 memcpy(&bad->key, &key,
6300 sizeof(struct btrfs_key));
6301 bad->root_id = owner;
6302 list_add_tail(&bad->list, &delete_items);
6305 if (key.type != BTRFS_EXTENT_DATA_KEY)
6307 fi = btrfs_item_ptr(buf, i,
6308 struct btrfs_file_extent_item);
6309 if (btrfs_file_extent_type(buf, fi) ==
6310 BTRFS_FILE_EXTENT_INLINE)
6312 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6315 data_bytes_allocated +=
6316 btrfs_file_extent_disk_num_bytes(buf, fi);
6317 if (data_bytes_allocated < root->sectorsize) {
6320 data_bytes_referenced +=
6321 btrfs_file_extent_num_bytes(buf, fi);
6322 add_data_backref(extent_cache,
6323 btrfs_file_extent_disk_bytenr(buf, fi),
6324 parent, owner, key.objectid, key.offset -
6325 btrfs_file_extent_offset(buf, fi), 1, 1,
6326 btrfs_file_extent_disk_num_bytes(buf, fi));
6330 struct btrfs_key first_key;
6332 first_key.objectid = 0;
6335 btrfs_item_key_to_cpu(buf, &first_key, 0);
6336 level = btrfs_header_level(buf);
6337 for (i = 0; i < nritems; i++) {
6338 struct extent_record tmpl;
6340 ptr = btrfs_node_blockptr(buf, i);
6341 size = root->nodesize;
6342 btrfs_node_key_to_cpu(buf, &key, i);
6344 if ((level == ri->drop_level)
6345 && is_dropped_key(&key, &ri->drop_key)) {
6350 memset(&tmpl, 0, sizeof(tmpl));
6351 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6352 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6357 tmpl.max_size = size;
6358 ret = add_extent_rec(extent_cache, &tmpl);
6361 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6364 add_pending(nodes, seen, ptr, size);
6366 add_pending(pending, seen, ptr, size);
6369 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6370 nritems) * sizeof(struct btrfs_key_ptr);
6372 total_btree_bytes += buf->len;
6373 if (fs_root_objectid(btrfs_header_owner(buf)))
6374 total_fs_tree_bytes += buf->len;
6375 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6376 total_extent_tree_bytes += buf->len;
6377 if (!found_old_backref &&
6378 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6379 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6380 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6381 found_old_backref = 1;
6383 free_extent_buffer(buf);
6387 static int add_root_to_pending(struct extent_buffer *buf,
6388 struct cache_tree *extent_cache,
6389 struct cache_tree *pending,
6390 struct cache_tree *seen,
6391 struct cache_tree *nodes,
6394 struct extent_record tmpl;
6396 if (btrfs_header_level(buf) > 0)
6397 add_pending(nodes, seen, buf->start, buf->len);
6399 add_pending(pending, seen, buf->start, buf->len);
6401 memset(&tmpl, 0, sizeof(tmpl));
6402 tmpl.start = buf->start;
6407 tmpl.max_size = buf->len;
6408 add_extent_rec(extent_cache, &tmpl);
6410 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6411 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6412 add_tree_backref(extent_cache, buf->start, buf->start,
6415 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6419 /* as we fix the tree, we might be deleting blocks that
6420 * we're tracking for repair. This hook makes sure we
6421 * remove any backrefs for blocks as we are fixing them.
6423 static int free_extent_hook(struct btrfs_trans_handle *trans,
6424 struct btrfs_root *root,
6425 u64 bytenr, u64 num_bytes, u64 parent,
6426 u64 root_objectid, u64 owner, u64 offset,
6429 struct extent_record *rec;
6430 struct cache_extent *cache;
6432 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6434 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6435 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6439 rec = container_of(cache, struct extent_record, cache);
6441 struct data_backref *back;
6442 back = find_data_backref(rec, parent, root_objectid, owner,
6443 offset, 1, bytenr, num_bytes);
6446 if (back->node.found_ref) {
6447 back->found_ref -= refs_to_drop;
6449 rec->refs -= refs_to_drop;
6451 if (back->node.found_extent_tree) {
6452 back->num_refs -= refs_to_drop;
6453 if (rec->extent_item_refs)
6454 rec->extent_item_refs -= refs_to_drop;
6456 if (back->found_ref == 0)
6457 back->node.found_ref = 0;
6458 if (back->num_refs == 0)
6459 back->node.found_extent_tree = 0;
6461 if (!back->node.found_extent_tree && back->node.found_ref) {
6462 rb_erase(&back->node.node, &rec->backref_tree);
6466 struct tree_backref *back;
6467 back = find_tree_backref(rec, parent, root_objectid);
6470 if (back->node.found_ref) {
6473 back->node.found_ref = 0;
6475 if (back->node.found_extent_tree) {
6476 if (rec->extent_item_refs)
6477 rec->extent_item_refs--;
6478 back->node.found_extent_tree = 0;
6480 if (!back->node.found_extent_tree && back->node.found_ref) {
6481 rb_erase(&back->node.node, &rec->backref_tree);
6485 maybe_free_extent_rec(extent_cache, rec);
6490 static int delete_extent_records(struct btrfs_trans_handle *trans,
6491 struct btrfs_root *root,
6492 struct btrfs_path *path,
6493 u64 bytenr, u64 new_len)
6495 struct btrfs_key key;
6496 struct btrfs_key found_key;
6497 struct extent_buffer *leaf;
6502 key.objectid = bytenr;
6504 key.offset = (u64)-1;
6507 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6514 if (path->slots[0] == 0)
6520 leaf = path->nodes[0];
6521 slot = path->slots[0];
6523 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6524 if (found_key.objectid != bytenr)
6527 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6528 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6529 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6530 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6531 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6532 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6533 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6534 btrfs_release_path(path);
6535 if (found_key.type == 0) {
6536 if (found_key.offset == 0)
6538 key.offset = found_key.offset - 1;
6539 key.type = found_key.type;
6541 key.type = found_key.type - 1;
6542 key.offset = (u64)-1;
6546 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6547 found_key.objectid, found_key.type, found_key.offset);
6549 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6552 btrfs_release_path(path);
6554 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6555 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6556 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6557 found_key.offset : root->nodesize;
6559 ret = btrfs_update_block_group(trans, root, bytenr,
6566 btrfs_release_path(path);
6571 * for a single backref, this will allocate a new extent
6572 * and add the backref to it.
6574 static int record_extent(struct btrfs_trans_handle *trans,
6575 struct btrfs_fs_info *info,
6576 struct btrfs_path *path,
6577 struct extent_record *rec,
6578 struct extent_backref *back,
6579 int allocated, u64 flags)
6582 struct btrfs_root *extent_root = info->extent_root;
6583 struct extent_buffer *leaf;
6584 struct btrfs_key ins_key;
6585 struct btrfs_extent_item *ei;
6586 struct tree_backref *tback;
6587 struct data_backref *dback;
6588 struct btrfs_tree_block_info *bi;
6591 rec->max_size = max_t(u64, rec->max_size,
6592 info->extent_root->nodesize);
6595 u32 item_size = sizeof(*ei);
6598 item_size += sizeof(*bi);
6600 ins_key.objectid = rec->start;
6601 ins_key.offset = rec->max_size;
6602 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6604 ret = btrfs_insert_empty_item(trans, extent_root, path,
6605 &ins_key, item_size);
6609 leaf = path->nodes[0];
6610 ei = btrfs_item_ptr(leaf, path->slots[0],
6611 struct btrfs_extent_item);
6613 btrfs_set_extent_refs(leaf, ei, 0);
6614 btrfs_set_extent_generation(leaf, ei, rec->generation);
6616 if (back->is_data) {
6617 btrfs_set_extent_flags(leaf, ei,
6618 BTRFS_EXTENT_FLAG_DATA);
6620 struct btrfs_disk_key copy_key;;
6622 tback = to_tree_backref(back);
6623 bi = (struct btrfs_tree_block_info *)(ei + 1);
6624 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6627 btrfs_set_disk_key_objectid(©_key,
6628 rec->info_objectid);
6629 btrfs_set_disk_key_type(©_key, 0);
6630 btrfs_set_disk_key_offset(©_key, 0);
6632 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6633 btrfs_set_tree_block_key(leaf, bi, ©_key);
6635 btrfs_set_extent_flags(leaf, ei,
6636 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6639 btrfs_mark_buffer_dirty(leaf);
6640 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6641 rec->max_size, 1, 0);
6644 btrfs_release_path(path);
6647 if (back->is_data) {
6651 dback = to_data_backref(back);
6652 if (back->full_backref)
6653 parent = dback->parent;
6657 for (i = 0; i < dback->found_ref; i++) {
6658 /* if parent != 0, we're doing a full backref
6659 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6660 * just makes the backref allocator create a data
6663 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6664 rec->start, rec->max_size,
6668 BTRFS_FIRST_FREE_OBJECTID :
6674 fprintf(stderr, "adding new data backref"
6675 " on %llu %s %llu owner %llu"
6676 " offset %llu found %d\n",
6677 (unsigned long long)rec->start,
6678 back->full_backref ?
6680 back->full_backref ?
6681 (unsigned long long)parent :
6682 (unsigned long long)dback->root,
6683 (unsigned long long)dback->owner,
6684 (unsigned long long)dback->offset,
6689 tback = to_tree_backref(back);
6690 if (back->full_backref)
6691 parent = tback->parent;
6695 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6696 rec->start, rec->max_size,
6697 parent, tback->root, 0, 0);
6698 fprintf(stderr, "adding new tree backref on "
6699 "start %llu len %llu parent %llu root %llu\n",
6700 rec->start, rec->max_size, parent, tback->root);
6703 btrfs_release_path(path);
6707 static struct extent_entry *find_entry(struct list_head *entries,
6708 u64 bytenr, u64 bytes)
6710 struct extent_entry *entry = NULL;
6712 list_for_each_entry(entry, entries, list) {
6713 if (entry->bytenr == bytenr && entry->bytes == bytes)
6720 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6722 struct extent_entry *entry, *best = NULL, *prev = NULL;
6724 list_for_each_entry(entry, entries, list) {
6731 * If there are as many broken entries as entries then we know
6732 * not to trust this particular entry.
6734 if (entry->broken == entry->count)
6738 * If our current entry == best then we can't be sure our best
6739 * is really the best, so we need to keep searching.
6741 if (best && best->count == entry->count) {
6747 /* Prev == entry, not good enough, have to keep searching */
6748 if (!prev->broken && prev->count == entry->count)
6752 best = (prev->count > entry->count) ? prev : entry;
6753 else if (best->count < entry->count)
6761 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6762 struct data_backref *dback, struct extent_entry *entry)
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root;
6766 struct btrfs_file_extent_item *fi;
6767 struct extent_buffer *leaf;
6768 struct btrfs_key key;
6772 key.objectid = dback->root;
6773 key.type = BTRFS_ROOT_ITEM_KEY;
6774 key.offset = (u64)-1;
6775 root = btrfs_read_fs_root(info, &key);
6777 fprintf(stderr, "Couldn't find root for our ref\n");
6782 * The backref points to the original offset of the extent if it was
6783 * split, so we need to search down to the offset we have and then walk
6784 * forward until we find the backref we're looking for.
6786 key.objectid = dback->owner;
6787 key.type = BTRFS_EXTENT_DATA_KEY;
6788 key.offset = dback->offset;
6789 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6791 fprintf(stderr, "Error looking up ref %d\n", ret);
6796 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6797 ret = btrfs_next_leaf(root, path);
6799 fprintf(stderr, "Couldn't find our ref, next\n");
6803 leaf = path->nodes[0];
6804 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6805 if (key.objectid != dback->owner ||
6806 key.type != BTRFS_EXTENT_DATA_KEY) {
6807 fprintf(stderr, "Couldn't find our ref, search\n");
6810 fi = btrfs_item_ptr(leaf, path->slots[0],
6811 struct btrfs_file_extent_item);
6812 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6813 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6815 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6820 btrfs_release_path(path);
6822 trans = btrfs_start_transaction(root, 1);
6824 return PTR_ERR(trans);
6827 * Ok we have the key of the file extent we want to fix, now we can cow
6828 * down to the thing and fix it.
6830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6832 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6833 key.objectid, key.type, key.offset, ret);
6837 fprintf(stderr, "Well that's odd, we just found this key "
6838 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6843 leaf = path->nodes[0];
6844 fi = btrfs_item_ptr(leaf, path->slots[0],
6845 struct btrfs_file_extent_item);
6847 if (btrfs_file_extent_compression(leaf, fi) &&
6848 dback->disk_bytenr != entry->bytenr) {
6849 fprintf(stderr, "Ref doesn't match the record start and is "
6850 "compressed, please take a btrfs-image of this file "
6851 "system and send it to a btrfs developer so they can "
6852 "complete this functionality for bytenr %Lu\n",
6853 dback->disk_bytenr);
6858 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6859 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6860 } else if (dback->disk_bytenr > entry->bytenr) {
6861 u64 off_diff, offset;
6863 off_diff = dback->disk_bytenr - entry->bytenr;
6864 offset = btrfs_file_extent_offset(leaf, fi);
6865 if (dback->disk_bytenr + offset +
6866 btrfs_file_extent_num_bytes(leaf, fi) >
6867 entry->bytenr + entry->bytes) {
6868 fprintf(stderr, "Ref is past the entry end, please "
6869 "take a btrfs-image of this file system and "
6870 "send it to a btrfs developer, ref %Lu\n",
6871 dback->disk_bytenr);
6876 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6877 btrfs_set_file_extent_offset(leaf, fi, offset);
6878 } else if (dback->disk_bytenr < entry->bytenr) {
6881 offset = btrfs_file_extent_offset(leaf, fi);
6882 if (dback->disk_bytenr + offset < entry->bytenr) {
6883 fprintf(stderr, "Ref is before the entry start, please"
6884 " take a btrfs-image of this file system and "
6885 "send it to a btrfs developer, ref %Lu\n",
6886 dback->disk_bytenr);
6891 offset += dback->disk_bytenr;
6892 offset -= entry->bytenr;
6893 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6894 btrfs_set_file_extent_offset(leaf, fi, offset);
6897 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6900 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6901 * only do this if we aren't using compression, otherwise it's a
6904 if (!btrfs_file_extent_compression(leaf, fi))
6905 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6907 printf("ram bytes may be wrong?\n");
6908 btrfs_mark_buffer_dirty(leaf);
6910 err = btrfs_commit_transaction(trans, root);
6911 btrfs_release_path(path);
6912 return ret ? ret : err;
6915 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6916 struct extent_record *rec)
6918 struct extent_backref *back, *tmp;
6919 struct data_backref *dback;
6920 struct extent_entry *entry, *best = NULL;
6923 int broken_entries = 0;
6928 * Metadata is easy and the backrefs should always agree on bytenr and
6929 * size, if not we've got bigger issues.
6934 rbtree_postorder_for_each_entry_safe(back, tmp,
6935 &rec->backref_tree, node) {
6936 if (back->full_backref || !back->is_data)
6939 dback = to_data_backref(back);
6942 * We only pay attention to backrefs that we found a real
6945 if (dback->found_ref == 0)
6949 * For now we only catch when the bytes don't match, not the
6950 * bytenr. We can easily do this at the same time, but I want
6951 * to have a fs image to test on before we just add repair
6952 * functionality willy-nilly so we know we won't screw up the
6956 entry = find_entry(&entries, dback->disk_bytenr,
6959 entry = malloc(sizeof(struct extent_entry));
6964 memset(entry, 0, sizeof(*entry));
6965 entry->bytenr = dback->disk_bytenr;
6966 entry->bytes = dback->bytes;
6967 list_add_tail(&entry->list, &entries);
6972 * If we only have on entry we may think the entries agree when
6973 * in reality they don't so we have to do some extra checking.
6975 if (dback->disk_bytenr != rec->start ||
6976 dback->bytes != rec->nr || back->broken)
6987 /* Yay all the backrefs agree, carry on good sir */
6988 if (nr_entries <= 1 && !mismatch)
6991 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
6992 "%Lu\n", rec->start);
6995 * First we want to see if the backrefs can agree amongst themselves who
6996 * is right, so figure out which one of the entries has the highest
6999 best = find_most_right_entry(&entries);
7002 * Ok so we may have an even split between what the backrefs think, so
7003 * this is where we use the extent ref to see what it thinks.
7006 entry = find_entry(&entries, rec->start, rec->nr);
7007 if (!entry && (!broken_entries || !rec->found_rec)) {
7008 fprintf(stderr, "Backrefs don't agree with each other "
7009 "and extent record doesn't agree with anybody,"
7010 " so we can't fix bytenr %Lu bytes %Lu\n",
7011 rec->start, rec->nr);
7014 } else if (!entry) {
7016 * Ok our backrefs were broken, we'll assume this is the
7017 * correct value and add an entry for this range.
7019 entry = malloc(sizeof(struct extent_entry));
7024 memset(entry, 0, sizeof(*entry));
7025 entry->bytenr = rec->start;
7026 entry->bytes = rec->nr;
7027 list_add_tail(&entry->list, &entries);
7031 best = find_most_right_entry(&entries);
7033 fprintf(stderr, "Backrefs and extent record evenly "
7034 "split on who is right, this is going to "
7035 "require user input to fix bytenr %Lu bytes "
7036 "%Lu\n", rec->start, rec->nr);
7043 * I don't think this can happen currently as we'll abort() if we catch
7044 * this case higher up, but in case somebody removes that we still can't
7045 * deal with it properly here yet, so just bail out of that's the case.
7047 if (best->bytenr != rec->start) {
7048 fprintf(stderr, "Extent start and backref starts don't match, "
7049 "please use btrfs-image on this file system and send "
7050 "it to a btrfs developer so they can make fsck fix "
7051 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7052 rec->start, rec->nr);
7058 * Ok great we all agreed on an extent record, let's go find the real
7059 * references and fix up the ones that don't match.
7061 rbtree_postorder_for_each_entry_safe(back, tmp,
7062 &rec->backref_tree, node) {
7063 if (back->full_backref || !back->is_data)
7066 dback = to_data_backref(back);
7069 * Still ignoring backrefs that don't have a real ref attached
7072 if (dback->found_ref == 0)
7075 if (dback->bytes == best->bytes &&
7076 dback->disk_bytenr == best->bytenr)
7079 ret = repair_ref(info, path, dback, best);
7085 * Ok we messed with the actual refs, which means we need to drop our
7086 * entire cache and go back and rescan. I know this is a huge pain and
7087 * adds a lot of extra work, but it's the only way to be safe. Once all
7088 * the backrefs agree we may not need to do anything to the extent
7093 while (!list_empty(&entries)) {
7094 entry = list_entry(entries.next, struct extent_entry, list);
7095 list_del_init(&entry->list);
7101 static int process_duplicates(struct btrfs_root *root,
7102 struct cache_tree *extent_cache,
7103 struct extent_record *rec)
7105 struct extent_record *good, *tmp;
7106 struct cache_extent *cache;
7110 * If we found a extent record for this extent then return, or if we
7111 * have more than one duplicate we are likely going to need to delete
7114 if (rec->found_rec || rec->num_duplicates > 1)
7117 /* Shouldn't happen but just in case */
7118 BUG_ON(!rec->num_duplicates);
7121 * So this happens if we end up with a backref that doesn't match the
7122 * actual extent entry. So either the backref is bad or the extent
7123 * entry is bad. Either way we want to have the extent_record actually
7124 * reflect what we found in the extent_tree, so we need to take the
7125 * duplicate out and use that as the extent_record since the only way we
7126 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7128 remove_cache_extent(extent_cache, &rec->cache);
7130 good = to_extent_record(rec->dups.next);
7131 list_del_init(&good->list);
7132 INIT_LIST_HEAD(&good->backrefs);
7133 INIT_LIST_HEAD(&good->dups);
7134 good->cache.start = good->start;
7135 good->cache.size = good->nr;
7136 good->content_checked = 0;
7137 good->owner_ref_checked = 0;
7138 good->num_duplicates = 0;
7139 good->refs = rec->refs;
7140 list_splice_init(&rec->backrefs, &good->backrefs);
7142 cache = lookup_cache_extent(extent_cache, good->start,
7146 tmp = container_of(cache, struct extent_record, cache);
7149 * If we find another overlapping extent and it's found_rec is
7150 * set then it's a duplicate and we need to try and delete
7153 if (tmp->found_rec || tmp->num_duplicates > 0) {
7154 if (list_empty(&good->list))
7155 list_add_tail(&good->list,
7156 &duplicate_extents);
7157 good->num_duplicates += tmp->num_duplicates + 1;
7158 list_splice_init(&tmp->dups, &good->dups);
7159 list_del_init(&tmp->list);
7160 list_add_tail(&tmp->list, &good->dups);
7161 remove_cache_extent(extent_cache, &tmp->cache);
7166 * Ok we have another non extent item backed extent rec, so lets
7167 * just add it to this extent and carry on like we did above.
7169 good->refs += tmp->refs;
7170 list_splice_init(&tmp->backrefs, &good->backrefs);
7171 remove_cache_extent(extent_cache, &tmp->cache);
7174 ret = insert_cache_extent(extent_cache, &good->cache);
7177 return good->num_duplicates ? 0 : 1;
7180 static int delete_duplicate_records(struct btrfs_root *root,
7181 struct extent_record *rec)
7183 struct btrfs_trans_handle *trans;
7184 LIST_HEAD(delete_list);
7185 struct btrfs_path *path;
7186 struct extent_record *tmp, *good, *n;
7189 struct btrfs_key key;
7191 path = btrfs_alloc_path();
7198 /* Find the record that covers all of the duplicates. */
7199 list_for_each_entry(tmp, &rec->dups, list) {
7200 if (good->start < tmp->start)
7202 if (good->nr > tmp->nr)
7205 if (tmp->start + tmp->nr < good->start + good->nr) {
7206 fprintf(stderr, "Ok we have overlapping extents that "
7207 "aren't completely covered by each other, this "
7208 "is going to require more careful thought. "
7209 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7210 tmp->start, tmp->nr, good->start, good->nr);
7217 list_add_tail(&rec->list, &delete_list);
7219 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7222 list_move_tail(&tmp->list, &delete_list);
7225 root = root->fs_info->extent_root;
7226 trans = btrfs_start_transaction(root, 1);
7227 if (IS_ERR(trans)) {
7228 ret = PTR_ERR(trans);
7232 list_for_each_entry(tmp, &delete_list, list) {
7233 if (tmp->found_rec == 0)
7235 key.objectid = tmp->start;
7236 key.type = BTRFS_EXTENT_ITEM_KEY;
7237 key.offset = tmp->nr;
7239 /* Shouldn't happen but just in case */
7240 if (tmp->metadata) {
7241 fprintf(stderr, "Well this shouldn't happen, extent "
7242 "record overlaps but is metadata? "
7243 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7247 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7253 ret = btrfs_del_item(trans, root, path);
7256 btrfs_release_path(path);
7259 err = btrfs_commit_transaction(trans, root);
7263 while (!list_empty(&delete_list)) {
7264 tmp = to_extent_record(delete_list.next);
7265 list_del_init(&tmp->list);
7271 while (!list_empty(&rec->dups)) {
7272 tmp = to_extent_record(rec->dups.next);
7273 list_del_init(&tmp->list);
7277 btrfs_free_path(path);
7279 if (!ret && !nr_del)
7280 rec->num_duplicates = 0;
7282 return ret ? ret : nr_del;
7285 static int find_possible_backrefs(struct btrfs_fs_info *info,
7286 struct btrfs_path *path,
7287 struct cache_tree *extent_cache,
7288 struct extent_record *rec)
7290 struct btrfs_root *root;
7291 struct extent_backref *back, *tmp;
7292 struct data_backref *dback;
7293 struct cache_extent *cache;
7294 struct btrfs_file_extent_item *fi;
7295 struct btrfs_key key;
7299 rbtree_postorder_for_each_entry_safe(back, tmp,
7300 &rec->backref_tree, node) {
7301 /* Don't care about full backrefs (poor unloved backrefs) */
7302 if (back->full_backref || !back->is_data)
7305 dback = to_data_backref(back);
7307 /* We found this one, we don't need to do a lookup */
7308 if (dback->found_ref)
7311 key.objectid = dback->root;
7312 key.type = BTRFS_ROOT_ITEM_KEY;
7313 key.offset = (u64)-1;
7315 root = btrfs_read_fs_root(info, &key);
7317 /* No root, definitely a bad ref, skip */
7318 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7320 /* Other err, exit */
7322 return PTR_ERR(root);
7324 key.objectid = dback->owner;
7325 key.type = BTRFS_EXTENT_DATA_KEY;
7326 key.offset = dback->offset;
7327 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7329 btrfs_release_path(path);
7332 /* Didn't find it, we can carry on */
7337 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7338 struct btrfs_file_extent_item);
7339 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7340 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7341 btrfs_release_path(path);
7342 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7344 struct extent_record *tmp;
7345 tmp = container_of(cache, struct extent_record, cache);
7348 * If we found an extent record for the bytenr for this
7349 * particular backref then we can't add it to our
7350 * current extent record. We only want to add backrefs
7351 * that don't have a corresponding extent item in the
7352 * extent tree since they likely belong to this record
7353 * and we need to fix it if it doesn't match bytenrs.
7359 dback->found_ref += 1;
7360 dback->disk_bytenr = bytenr;
7361 dback->bytes = bytes;
7364 * Set this so the verify backref code knows not to trust the
7365 * values in this backref.
7374 * Record orphan data ref into corresponding root.
7376 * Return 0 if the extent item contains data ref and recorded.
7377 * Return 1 if the extent item contains no useful data ref
7378 * On that case, it may contains only shared_dataref or metadata backref
7379 * or the file extent exists(this should be handled by the extent bytenr
7381 * Return <0 if something goes wrong.
7383 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7384 struct extent_record *rec)
7386 struct btrfs_key key;
7387 struct btrfs_root *dest_root;
7388 struct extent_backref *back, *tmp;
7389 struct data_backref *dback;
7390 struct orphan_data_extent *orphan;
7391 struct btrfs_path *path;
7392 int recorded_data_ref = 0;
7397 path = btrfs_alloc_path();
7400 rbtree_postorder_for_each_entry_safe(back, tmp,
7401 &rec->backref_tree, node) {
7402 if (back->full_backref || !back->is_data ||
7403 !back->found_extent_tree)
7405 dback = to_data_backref(back);
7406 if (dback->found_ref)
7408 key.objectid = dback->root;
7409 key.type = BTRFS_ROOT_ITEM_KEY;
7410 key.offset = (u64)-1;
7412 dest_root = btrfs_read_fs_root(fs_info, &key);
7414 /* For non-exist root we just skip it */
7415 if (IS_ERR(dest_root) || !dest_root)
7418 key.objectid = dback->owner;
7419 key.type = BTRFS_EXTENT_DATA_KEY;
7420 key.offset = dback->offset;
7422 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7424 * For ret < 0, it's OK since the fs-tree may be corrupted,
7425 * we need to record it for inode/file extent rebuild.
7426 * For ret > 0, we record it only for file extent rebuild.
7427 * For ret == 0, the file extent exists but only bytenr
7428 * mismatch, let the original bytenr fix routine to handle,
7434 orphan = malloc(sizeof(*orphan));
7439 INIT_LIST_HEAD(&orphan->list);
7440 orphan->root = dback->root;
7441 orphan->objectid = dback->owner;
7442 orphan->offset = dback->offset;
7443 orphan->disk_bytenr = rec->cache.start;
7444 orphan->disk_len = rec->cache.size;
7445 list_add(&dest_root->orphan_data_extents, &orphan->list);
7446 recorded_data_ref = 1;
7449 btrfs_free_path(path);
7451 return !recorded_data_ref;
7457 * when an incorrect extent item is found, this will delete
7458 * all of the existing entries for it and recreate them
7459 * based on what the tree scan found.
7461 static int fixup_extent_refs(struct btrfs_fs_info *info,
7462 struct cache_tree *extent_cache,
7463 struct extent_record *rec)
7465 struct btrfs_trans_handle *trans = NULL;
7467 struct btrfs_path *path;
7468 struct cache_extent *cache;
7469 struct extent_backref *back, *tmp;
7473 if (rec->flag_block_full_backref)
7474 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7476 path = btrfs_alloc_path();
7480 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7482 * Sometimes the backrefs themselves are so broken they don't
7483 * get attached to any meaningful rec, so first go back and
7484 * check any of our backrefs that we couldn't find and throw
7485 * them into the list if we find the backref so that
7486 * verify_backrefs can figure out what to do.
7488 ret = find_possible_backrefs(info, path, extent_cache, rec);
7493 /* step one, make sure all of the backrefs agree */
7494 ret = verify_backrefs(info, path, rec);
7498 trans = btrfs_start_transaction(info->extent_root, 1);
7499 if (IS_ERR(trans)) {
7500 ret = PTR_ERR(trans);
7504 /* step two, delete all the existing records */
7505 ret = delete_extent_records(trans, info->extent_root, path,
7506 rec->start, rec->max_size);
7511 /* was this block corrupt? If so, don't add references to it */
7512 cache = lookup_cache_extent(info->corrupt_blocks,
7513 rec->start, rec->max_size);
7519 /* step three, recreate all the refs we did find */
7520 rbtree_postorder_for_each_entry_safe(back, tmp,
7521 &rec->backref_tree, node) {
7523 * if we didn't find any references, don't create a
7526 if (!back->found_ref)
7529 rec->bad_full_backref = 0;
7530 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7538 int err = btrfs_commit_transaction(trans, info->extent_root);
7543 btrfs_free_path(path);
7547 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7548 struct extent_record *rec)
7550 struct btrfs_trans_handle *trans;
7551 struct btrfs_root *root = fs_info->extent_root;
7552 struct btrfs_path *path;
7553 struct btrfs_extent_item *ei;
7554 struct btrfs_key key;
7558 key.objectid = rec->start;
7559 if (rec->metadata) {
7560 key.type = BTRFS_METADATA_ITEM_KEY;
7561 key.offset = rec->info_level;
7563 key.type = BTRFS_EXTENT_ITEM_KEY;
7564 key.offset = rec->max_size;
7567 path = btrfs_alloc_path();
7571 trans = btrfs_start_transaction(root, 0);
7572 if (IS_ERR(trans)) {
7573 btrfs_free_path(path);
7574 return PTR_ERR(trans);
7577 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7579 btrfs_free_path(path);
7580 btrfs_commit_transaction(trans, root);
7583 fprintf(stderr, "Didn't find extent for %llu\n",
7584 (unsigned long long)rec->start);
7585 btrfs_free_path(path);
7586 btrfs_commit_transaction(trans, root);
7590 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7591 struct btrfs_extent_item);
7592 flags = btrfs_extent_flags(path->nodes[0], ei);
7593 if (rec->flag_block_full_backref) {
7594 fprintf(stderr, "setting full backref on %llu\n",
7595 (unsigned long long)key.objectid);
7596 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7598 fprintf(stderr, "clearing full backref on %llu\n",
7599 (unsigned long long)key.objectid);
7600 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7602 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7603 btrfs_mark_buffer_dirty(path->nodes[0]);
7604 btrfs_free_path(path);
7605 return btrfs_commit_transaction(trans, root);
7608 /* right now we only prune from the extent allocation tree */
7609 static int prune_one_block(struct btrfs_trans_handle *trans,
7610 struct btrfs_fs_info *info,
7611 struct btrfs_corrupt_block *corrupt)
7614 struct btrfs_path path;
7615 struct extent_buffer *eb;
7619 int level = corrupt->level + 1;
7621 btrfs_init_path(&path);
7623 /* we want to stop at the parent to our busted block */
7624 path.lowest_level = level;
7626 ret = btrfs_search_slot(trans, info->extent_root,
7627 &corrupt->key, &path, -1, 1);
7632 eb = path.nodes[level];
7639 * hopefully the search gave us the block we want to prune,
7640 * lets try that first
7642 slot = path.slots[level];
7643 found = btrfs_node_blockptr(eb, slot);
7644 if (found == corrupt->cache.start)
7647 nritems = btrfs_header_nritems(eb);
7649 /* the search failed, lets scan this node and hope we find it */
7650 for (slot = 0; slot < nritems; slot++) {
7651 found = btrfs_node_blockptr(eb, slot);
7652 if (found == corrupt->cache.start)
7656 * we couldn't find the bad block. TODO, search all the nodes for pointers
7659 if (eb == info->extent_root->node) {
7664 btrfs_release_path(&path);
7669 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7670 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7673 btrfs_release_path(&path);
7677 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7679 struct btrfs_trans_handle *trans = NULL;
7680 struct cache_extent *cache;
7681 struct btrfs_corrupt_block *corrupt;
7684 cache = search_cache_extent(info->corrupt_blocks, 0);
7688 trans = btrfs_start_transaction(info->extent_root, 1);
7690 return PTR_ERR(trans);
7692 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7693 prune_one_block(trans, info, corrupt);
7694 remove_cache_extent(info->corrupt_blocks, cache);
7697 return btrfs_commit_transaction(trans, info->extent_root);
7701 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7703 struct btrfs_block_group_cache *cache;
7708 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7709 &start, &end, EXTENT_DIRTY);
7712 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7718 cache = btrfs_lookup_first_block_group(fs_info, start);
7723 start = cache->key.objectid + cache->key.offset;
7727 static int check_extent_refs(struct btrfs_root *root,
7728 struct cache_tree *extent_cache)
7730 struct extent_record *rec;
7731 struct cache_extent *cache;
7740 * if we're doing a repair, we have to make sure
7741 * we don't allocate from the problem extents.
7742 * In the worst case, this will be all the
7745 cache = search_cache_extent(extent_cache, 0);
7747 rec = container_of(cache, struct extent_record, cache);
7748 set_extent_dirty(root->fs_info->excluded_extents,
7750 rec->start + rec->max_size - 1,
7752 cache = next_cache_extent(cache);
7755 /* pin down all the corrupted blocks too */
7756 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7758 set_extent_dirty(root->fs_info->excluded_extents,
7760 cache->start + cache->size - 1,
7762 cache = next_cache_extent(cache);
7764 prune_corrupt_blocks(root->fs_info);
7765 reset_cached_block_groups(root->fs_info);
7768 reset_cached_block_groups(root->fs_info);
7771 * We need to delete any duplicate entries we find first otherwise we
7772 * could mess up the extent tree when we have backrefs that actually
7773 * belong to a different extent item and not the weird duplicate one.
7775 while (repair && !list_empty(&duplicate_extents)) {
7776 rec = to_extent_record(duplicate_extents.next);
7777 list_del_init(&rec->list);
7779 /* Sometimes we can find a backref before we find an actual
7780 * extent, so we need to process it a little bit to see if there
7781 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7782 * if this is a backref screwup. If we need to delete stuff
7783 * process_duplicates() will return 0, otherwise it will return
7786 if (process_duplicates(root, extent_cache, rec))
7788 ret = delete_duplicate_records(root, rec);
7792 * delete_duplicate_records will return the number of entries
7793 * deleted, so if it's greater than 0 then we know we actually
7794 * did something and we need to remove.
7808 cache = search_cache_extent(extent_cache, 0);
7811 rec = container_of(cache, struct extent_record, cache);
7812 if (rec->num_duplicates) {
7813 fprintf(stderr, "extent item %llu has multiple extent "
7814 "items\n", (unsigned long long)rec->start);
7819 if (rec->refs != rec->extent_item_refs) {
7820 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7821 (unsigned long long)rec->start,
7822 (unsigned long long)rec->nr);
7823 fprintf(stderr, "extent item %llu, found %llu\n",
7824 (unsigned long long)rec->extent_item_refs,
7825 (unsigned long long)rec->refs);
7826 ret = record_orphan_data_extents(root->fs_info, rec);
7833 * we can't use the extent to repair file
7834 * extent, let the fallback method handle it.
7836 if (!fixed && repair) {
7837 ret = fixup_extent_refs(
7848 if (all_backpointers_checked(rec, 1)) {
7849 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7850 (unsigned long long)rec->start,
7851 (unsigned long long)rec->nr);
7853 if (!fixed && !recorded && repair) {
7854 ret = fixup_extent_refs(root->fs_info,
7863 if (!rec->owner_ref_checked) {
7864 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7865 (unsigned long long)rec->start,
7866 (unsigned long long)rec->nr);
7867 if (!fixed && !recorded && repair) {
7868 ret = fixup_extent_refs(root->fs_info,
7877 if (rec->bad_full_backref) {
7878 fprintf(stderr, "bad full backref, on [%llu]\n",
7879 (unsigned long long)rec->start);
7881 ret = fixup_extent_flags(root->fs_info, rec);
7890 * Although it's not a extent ref's problem, we reuse this
7891 * routine for error reporting.
7892 * No repair function yet.
7894 if (rec->crossing_stripes) {
7896 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7897 rec->start, rec->start + rec->max_size);
7902 if (rec->wrong_chunk_type) {
7904 "bad extent [%llu, %llu), type mismatch with chunk\n",
7905 rec->start, rec->start + rec->max_size);
7910 remove_cache_extent(extent_cache, cache);
7911 free_all_extent_backrefs(rec);
7912 if (!init_extent_tree && repair && (!cur_err || fixed))
7913 clear_extent_dirty(root->fs_info->excluded_extents,
7915 rec->start + rec->max_size - 1,
7921 if (ret && ret != -EAGAIN) {
7922 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7925 struct btrfs_trans_handle *trans;
7927 root = root->fs_info->extent_root;
7928 trans = btrfs_start_transaction(root, 1);
7929 if (IS_ERR(trans)) {
7930 ret = PTR_ERR(trans);
7934 btrfs_fix_block_accounting(trans, root);
7935 ret = btrfs_commit_transaction(trans, root);
7940 fprintf(stderr, "repaired damaged extent references\n");
7946 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7950 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7951 stripe_size = length;
7952 stripe_size /= num_stripes;
7953 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7954 stripe_size = length * 2;
7955 stripe_size /= num_stripes;
7956 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7957 stripe_size = length;
7958 stripe_size /= (num_stripes - 1);
7959 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7960 stripe_size = length;
7961 stripe_size /= (num_stripes - 2);
7963 stripe_size = length;
7969 * Check the chunk with its block group/dev list ref:
7970 * Return 0 if all refs seems valid.
7971 * Return 1 if part of refs seems valid, need later check for rebuild ref
7972 * like missing block group and needs to search extent tree to rebuild them.
7973 * Return -1 if essential refs are missing and unable to rebuild.
7975 static int check_chunk_refs(struct chunk_record *chunk_rec,
7976 struct block_group_tree *block_group_cache,
7977 struct device_extent_tree *dev_extent_cache,
7980 struct cache_extent *block_group_item;
7981 struct block_group_record *block_group_rec;
7982 struct cache_extent *dev_extent_item;
7983 struct device_extent_record *dev_extent_rec;
7987 int metadump_v2 = 0;
7991 block_group_item = lookup_cache_extent(&block_group_cache->tree,
7994 if (block_group_item) {
7995 block_group_rec = container_of(block_group_item,
7996 struct block_group_record,
7998 if (chunk_rec->length != block_group_rec->offset ||
7999 chunk_rec->offset != block_group_rec->objectid ||
8001 chunk_rec->type_flags != block_group_rec->flags)) {
8004 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8005 chunk_rec->objectid,
8010 chunk_rec->type_flags,
8011 block_group_rec->objectid,
8012 block_group_rec->type,
8013 block_group_rec->offset,
8014 block_group_rec->offset,
8015 block_group_rec->objectid,
8016 block_group_rec->flags);
8019 list_del_init(&block_group_rec->list);
8020 chunk_rec->bg_rec = block_group_rec;
8025 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8026 chunk_rec->objectid,
8031 chunk_rec->type_flags);
8038 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8039 chunk_rec->num_stripes);
8040 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8041 devid = chunk_rec->stripes[i].devid;
8042 offset = chunk_rec->stripes[i].offset;
8043 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8044 devid, offset, length);
8045 if (dev_extent_item) {
8046 dev_extent_rec = container_of(dev_extent_item,
8047 struct device_extent_record,
8049 if (dev_extent_rec->objectid != devid ||
8050 dev_extent_rec->offset != offset ||
8051 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8052 dev_extent_rec->length != length) {
8055 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8056 chunk_rec->objectid,
8059 chunk_rec->stripes[i].devid,
8060 chunk_rec->stripes[i].offset,
8061 dev_extent_rec->objectid,
8062 dev_extent_rec->offset,
8063 dev_extent_rec->length);
8066 list_move(&dev_extent_rec->chunk_list,
8067 &chunk_rec->dextents);
8072 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8073 chunk_rec->objectid,
8076 chunk_rec->stripes[i].devid,
8077 chunk_rec->stripes[i].offset);
8084 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8085 int check_chunks(struct cache_tree *chunk_cache,
8086 struct block_group_tree *block_group_cache,
8087 struct device_extent_tree *dev_extent_cache,
8088 struct list_head *good, struct list_head *bad,
8089 struct list_head *rebuild, int silent)
8091 struct cache_extent *chunk_item;
8092 struct chunk_record *chunk_rec;
8093 struct block_group_record *bg_rec;
8094 struct device_extent_record *dext_rec;
8098 chunk_item = first_cache_extent(chunk_cache);
8099 while (chunk_item) {
8100 chunk_rec = container_of(chunk_item, struct chunk_record,
8102 err = check_chunk_refs(chunk_rec, block_group_cache,
8103 dev_extent_cache, silent);
8106 if (err == 0 && good)
8107 list_add_tail(&chunk_rec->list, good);
8108 if (err > 0 && rebuild)
8109 list_add_tail(&chunk_rec->list, rebuild);
8111 list_add_tail(&chunk_rec->list, bad);
8112 chunk_item = next_cache_extent(chunk_item);
8115 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8118 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8126 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8130 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8141 static int check_device_used(struct device_record *dev_rec,
8142 struct device_extent_tree *dext_cache)
8144 struct cache_extent *cache;
8145 struct device_extent_record *dev_extent_rec;
8148 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8150 dev_extent_rec = container_of(cache,
8151 struct device_extent_record,
8153 if (dev_extent_rec->objectid != dev_rec->devid)
8156 list_del_init(&dev_extent_rec->device_list);
8157 total_byte += dev_extent_rec->length;
8158 cache = next_cache_extent(cache);
8161 if (total_byte != dev_rec->byte_used) {
8163 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8164 total_byte, dev_rec->byte_used, dev_rec->objectid,
8165 dev_rec->type, dev_rec->offset);
8172 /* check btrfs_dev_item -> btrfs_dev_extent */
8173 static int check_devices(struct rb_root *dev_cache,
8174 struct device_extent_tree *dev_extent_cache)
8176 struct rb_node *dev_node;
8177 struct device_record *dev_rec;
8178 struct device_extent_record *dext_rec;
8182 dev_node = rb_first(dev_cache);
8184 dev_rec = container_of(dev_node, struct device_record, node);
8185 err = check_device_used(dev_rec, dev_extent_cache);
8189 dev_node = rb_next(dev_node);
8191 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8194 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8195 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8202 static int add_root_item_to_list(struct list_head *head,
8203 u64 objectid, u64 bytenr, u64 last_snapshot,
8204 u8 level, u8 drop_level,
8205 int level_size, struct btrfs_key *drop_key)
8208 struct root_item_record *ri_rec;
8209 ri_rec = malloc(sizeof(*ri_rec));
8212 ri_rec->bytenr = bytenr;
8213 ri_rec->objectid = objectid;
8214 ri_rec->level = level;
8215 ri_rec->level_size = level_size;
8216 ri_rec->drop_level = drop_level;
8217 ri_rec->last_snapshot = last_snapshot;
8219 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8220 list_add_tail(&ri_rec->list, head);
8225 static void free_root_item_list(struct list_head *list)
8227 struct root_item_record *ri_rec;
8229 while (!list_empty(list)) {
8230 ri_rec = list_first_entry(list, struct root_item_record,
8232 list_del_init(&ri_rec->list);
8237 static int deal_root_from_list(struct list_head *list,
8238 struct btrfs_root *root,
8239 struct block_info *bits,
8241 struct cache_tree *pending,
8242 struct cache_tree *seen,
8243 struct cache_tree *reada,
8244 struct cache_tree *nodes,
8245 struct cache_tree *extent_cache,
8246 struct cache_tree *chunk_cache,
8247 struct rb_root *dev_cache,
8248 struct block_group_tree *block_group_cache,
8249 struct device_extent_tree *dev_extent_cache)
8254 while (!list_empty(list)) {
8255 struct root_item_record *rec;
8256 struct extent_buffer *buf;
8257 rec = list_entry(list->next,
8258 struct root_item_record, list);
8260 buf = read_tree_block(root->fs_info->tree_root,
8261 rec->bytenr, rec->level_size, 0);
8262 if (!extent_buffer_uptodate(buf)) {
8263 free_extent_buffer(buf);
8267 add_root_to_pending(buf, extent_cache, pending,
8268 seen, nodes, rec->objectid);
8270 * To rebuild extent tree, we need deal with snapshot
8271 * one by one, otherwise we deal with node firstly which
8272 * can maximize readahead.
8275 ret = run_next_block(root, bits, bits_nr, &last,
8276 pending, seen, reada, nodes,
8277 extent_cache, chunk_cache,
8278 dev_cache, block_group_cache,
8279 dev_extent_cache, rec);
8283 free_extent_buffer(buf);
8284 list_del(&rec->list);
8290 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8291 reada, nodes, extent_cache, chunk_cache,
8292 dev_cache, block_group_cache,
8293 dev_extent_cache, NULL);
8303 static int check_chunks_and_extents(struct btrfs_root *root)
8305 struct rb_root dev_cache;
8306 struct cache_tree chunk_cache;
8307 struct block_group_tree block_group_cache;
8308 struct device_extent_tree dev_extent_cache;
8309 struct cache_tree extent_cache;
8310 struct cache_tree seen;
8311 struct cache_tree pending;
8312 struct cache_tree reada;
8313 struct cache_tree nodes;
8314 struct extent_io_tree excluded_extents;
8315 struct cache_tree corrupt_blocks;
8316 struct btrfs_path path;
8317 struct btrfs_key key;
8318 struct btrfs_key found_key;
8320 struct block_info *bits;
8322 struct extent_buffer *leaf;
8324 struct btrfs_root_item ri;
8325 struct list_head dropping_trees;
8326 struct list_head normal_trees;
8327 struct btrfs_root *root1;
8332 dev_cache = RB_ROOT;
8333 cache_tree_init(&chunk_cache);
8334 block_group_tree_init(&block_group_cache);
8335 device_extent_tree_init(&dev_extent_cache);
8337 cache_tree_init(&extent_cache);
8338 cache_tree_init(&seen);
8339 cache_tree_init(&pending);
8340 cache_tree_init(&nodes);
8341 cache_tree_init(&reada);
8342 cache_tree_init(&corrupt_blocks);
8343 extent_io_tree_init(&excluded_extents);
8344 INIT_LIST_HEAD(&dropping_trees);
8345 INIT_LIST_HEAD(&normal_trees);
8348 root->fs_info->excluded_extents = &excluded_extents;
8349 root->fs_info->fsck_extent_cache = &extent_cache;
8350 root->fs_info->free_extent_hook = free_extent_hook;
8351 root->fs_info->corrupt_blocks = &corrupt_blocks;
8355 bits = malloc(bits_nr * sizeof(struct block_info));
8361 if (ctx.progress_enabled) {
8362 ctx.tp = TASK_EXTENTS;
8363 task_start(ctx.info);
8367 root1 = root->fs_info->tree_root;
8368 level = btrfs_header_level(root1->node);
8369 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8370 root1->node->start, 0, level, 0,
8371 root1->nodesize, NULL);
8374 root1 = root->fs_info->chunk_root;
8375 level = btrfs_header_level(root1->node);
8376 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8377 root1->node->start, 0, level, 0,
8378 root1->nodesize, NULL);
8381 btrfs_init_path(&path);
8384 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8385 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8390 leaf = path.nodes[0];
8391 slot = path.slots[0];
8392 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8393 ret = btrfs_next_leaf(root, &path);
8396 leaf = path.nodes[0];
8397 slot = path.slots[0];
8399 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8400 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8401 unsigned long offset;
8404 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8405 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8406 last_snapshot = btrfs_root_last_snapshot(&ri);
8407 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8408 level = btrfs_root_level(&ri);
8409 level_size = root->nodesize;
8410 ret = add_root_item_to_list(&normal_trees,
8412 btrfs_root_bytenr(&ri),
8413 last_snapshot, level,
8414 0, level_size, NULL);
8418 level = btrfs_root_level(&ri);
8419 level_size = root->nodesize;
8420 objectid = found_key.objectid;
8421 btrfs_disk_key_to_cpu(&found_key,
8423 ret = add_root_item_to_list(&dropping_trees,
8425 btrfs_root_bytenr(&ri),
8426 last_snapshot, level,
8428 level_size, &found_key);
8435 btrfs_release_path(&path);
8438 * check_block can return -EAGAIN if it fixes something, please keep
8439 * this in mind when dealing with return values from these functions, if
8440 * we get -EAGAIN we want to fall through and restart the loop.
8442 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8443 &seen, &reada, &nodes, &extent_cache,
8444 &chunk_cache, &dev_cache, &block_group_cache,
8451 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8452 &pending, &seen, &reada, &nodes,
8453 &extent_cache, &chunk_cache, &dev_cache,
8454 &block_group_cache, &dev_extent_cache);
8461 ret = check_chunks(&chunk_cache, &block_group_cache,
8462 &dev_extent_cache, NULL, NULL, NULL, 0);
8469 ret = check_extent_refs(root, &extent_cache);
8476 ret = check_devices(&dev_cache, &dev_extent_cache);
8481 task_stop(ctx.info);
8483 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8484 extent_io_tree_cleanup(&excluded_extents);
8485 root->fs_info->fsck_extent_cache = NULL;
8486 root->fs_info->free_extent_hook = NULL;
8487 root->fs_info->corrupt_blocks = NULL;
8488 root->fs_info->excluded_extents = NULL;
8491 free_chunk_cache_tree(&chunk_cache);
8492 free_device_cache_tree(&dev_cache);
8493 free_block_group_tree(&block_group_cache);
8494 free_device_extent_tree(&dev_extent_cache);
8495 free_extent_cache_tree(&seen);
8496 free_extent_cache_tree(&pending);
8497 free_extent_cache_tree(&reada);
8498 free_extent_cache_tree(&nodes);
8501 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8502 free_extent_cache_tree(&seen);
8503 free_extent_cache_tree(&pending);
8504 free_extent_cache_tree(&reada);
8505 free_extent_cache_tree(&nodes);
8506 free_chunk_cache_tree(&chunk_cache);
8507 free_block_group_tree(&block_group_cache);
8508 free_device_cache_tree(&dev_cache);
8509 free_device_extent_tree(&dev_extent_cache);
8510 free_extent_record_cache(root->fs_info, &extent_cache);
8511 free_root_item_list(&normal_trees);
8512 free_root_item_list(&dropping_trees);
8513 extent_io_tree_cleanup(&excluded_extents);
8517 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8518 struct btrfs_root *root, int overwrite)
8520 struct extent_buffer *c;
8521 struct extent_buffer *old = root->node;
8524 struct btrfs_disk_key disk_key = {0,0,0};
8530 extent_buffer_get(c);
8533 c = btrfs_alloc_free_block(trans, root,
8535 root->root_key.objectid,
8536 &disk_key, level, 0, 0);
8539 extent_buffer_get(c);
8543 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8544 btrfs_set_header_level(c, level);
8545 btrfs_set_header_bytenr(c, c->start);
8546 btrfs_set_header_generation(c, trans->transid);
8547 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8548 btrfs_set_header_owner(c, root->root_key.objectid);
8550 write_extent_buffer(c, root->fs_info->fsid,
8551 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8553 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8554 btrfs_header_chunk_tree_uuid(c),
8557 btrfs_mark_buffer_dirty(c);
8559 * this case can happen in the following case:
8561 * 1.overwrite previous root.
8563 * 2.reinit reloc data root, this is because we skip pin
8564 * down reloc data tree before which means we can allocate
8565 * same block bytenr here.
8567 if (old->start == c->start) {
8568 btrfs_set_root_generation(&root->root_item,
8570 root->root_item.level = btrfs_header_level(root->node);
8571 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8572 &root->root_key, &root->root_item);
8574 free_extent_buffer(c);
8578 free_extent_buffer(old);
8580 add_root_to_dirty_list(root);
8584 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8585 struct extent_buffer *eb, int tree_root)
8587 struct extent_buffer *tmp;
8588 struct btrfs_root_item *ri;
8589 struct btrfs_key key;
8592 int level = btrfs_header_level(eb);
8598 * If we have pinned this block before, don't pin it again.
8599 * This can not only avoid forever loop with broken filesystem
8600 * but also give us some speedups.
8602 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8603 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8606 btrfs_pin_extent(fs_info, eb->start, eb->len);
8608 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8609 nritems = btrfs_header_nritems(eb);
8610 for (i = 0; i < nritems; i++) {
8612 btrfs_item_key_to_cpu(eb, &key, i);
8613 if (key.type != BTRFS_ROOT_ITEM_KEY)
8615 /* Skip the extent root and reloc roots */
8616 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8617 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8618 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8620 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8621 bytenr = btrfs_disk_root_bytenr(eb, ri);
8624 * If at any point we start needing the real root we
8625 * will have to build a stump root for the root we are
8626 * in, but for now this doesn't actually use the root so
8627 * just pass in extent_root.
8629 tmp = read_tree_block(fs_info->extent_root, bytenr,
8631 if (!extent_buffer_uptodate(tmp)) {
8632 fprintf(stderr, "Error reading root block\n");
8635 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8636 free_extent_buffer(tmp);
8640 bytenr = btrfs_node_blockptr(eb, i);
8642 /* If we aren't the tree root don't read the block */
8643 if (level == 1 && !tree_root) {
8644 btrfs_pin_extent(fs_info, bytenr, nodesize);
8648 tmp = read_tree_block(fs_info->extent_root, bytenr,
8650 if (!extent_buffer_uptodate(tmp)) {
8651 fprintf(stderr, "Error reading tree block\n");
8654 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8655 free_extent_buffer(tmp);
8664 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8668 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8672 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8675 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8677 struct btrfs_block_group_cache *cache;
8678 struct btrfs_path *path;
8679 struct extent_buffer *leaf;
8680 struct btrfs_chunk *chunk;
8681 struct btrfs_key key;
8685 path = btrfs_alloc_path();
8690 key.type = BTRFS_CHUNK_ITEM_KEY;
8693 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8695 btrfs_free_path(path);
8700 * We do this in case the block groups were screwed up and had alloc
8701 * bits that aren't actually set on the chunks. This happens with
8702 * restored images every time and could happen in real life I guess.
8704 fs_info->avail_data_alloc_bits = 0;
8705 fs_info->avail_metadata_alloc_bits = 0;
8706 fs_info->avail_system_alloc_bits = 0;
8708 /* First we need to create the in-memory block groups */
8710 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8711 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8713 btrfs_free_path(path);
8721 leaf = path->nodes[0];
8722 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8723 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8728 chunk = btrfs_item_ptr(leaf, path->slots[0],
8729 struct btrfs_chunk);
8730 btrfs_add_block_group(fs_info, 0,
8731 btrfs_chunk_type(leaf, chunk),
8732 key.objectid, key.offset,
8733 btrfs_chunk_length(leaf, chunk));
8734 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8735 key.offset + btrfs_chunk_length(leaf, chunk),
8741 cache = btrfs_lookup_first_block_group(fs_info, start);
8745 start = cache->key.objectid + cache->key.offset;
8748 btrfs_free_path(path);
8752 static int reset_balance(struct btrfs_trans_handle *trans,
8753 struct btrfs_fs_info *fs_info)
8755 struct btrfs_root *root = fs_info->tree_root;
8756 struct btrfs_path *path;
8757 struct extent_buffer *leaf;
8758 struct btrfs_key key;
8759 int del_slot, del_nr = 0;
8763 path = btrfs_alloc_path();
8767 key.objectid = BTRFS_BALANCE_OBJECTID;
8768 key.type = BTRFS_BALANCE_ITEM_KEY;
8771 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8776 goto reinit_data_reloc;
8781 ret = btrfs_del_item(trans, root, path);
8784 btrfs_release_path(path);
8786 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8787 key.type = BTRFS_ROOT_ITEM_KEY;
8790 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8794 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8799 ret = btrfs_del_items(trans, root, path,
8806 btrfs_release_path(path);
8809 ret = btrfs_search_slot(trans, root, &key, path,
8816 leaf = path->nodes[0];
8817 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8818 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8820 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8825 del_slot = path->slots[0];
8834 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8838 btrfs_release_path(path);
8841 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8842 key.type = BTRFS_ROOT_ITEM_KEY;
8843 key.offset = (u64)-1;
8844 root = btrfs_read_fs_root(fs_info, &key);
8846 fprintf(stderr, "Error reading data reloc tree\n");
8847 ret = PTR_ERR(root);
8850 record_root_in_trans(trans, root);
8851 ret = btrfs_fsck_reinit_root(trans, root, 0);
8854 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8856 btrfs_free_path(path);
8860 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8861 struct btrfs_fs_info *fs_info)
8867 * The only reason we don't do this is because right now we're just
8868 * walking the trees we find and pinning down their bytes, we don't look
8869 * at any of the leaves. In order to do mixed groups we'd have to check
8870 * the leaves of any fs roots and pin down the bytes for any file
8871 * extents we find. Not hard but why do it if we don't have to?
8873 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8874 fprintf(stderr, "We don't support re-initing the extent tree "
8875 "for mixed block groups yet, please notify a btrfs "
8876 "developer you want to do this so they can add this "
8877 "functionality.\n");
8882 * first we need to walk all of the trees except the extent tree and pin
8883 * down the bytes that are in use so we don't overwrite any existing
8886 ret = pin_metadata_blocks(fs_info);
8888 fprintf(stderr, "error pinning down used bytes\n");
8893 * Need to drop all the block groups since we're going to recreate all
8896 btrfs_free_block_groups(fs_info);
8897 ret = reset_block_groups(fs_info);
8899 fprintf(stderr, "error resetting the block groups\n");
8903 /* Ok we can allocate now, reinit the extent root */
8904 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8906 fprintf(stderr, "extent root initialization failed\n");
8908 * When the transaction code is updated we should end the
8909 * transaction, but for now progs only knows about commit so
8910 * just return an error.
8916 * Now we have all the in-memory block groups setup so we can make
8917 * allocations properly, and the metadata we care about is safe since we
8918 * pinned all of it above.
8921 struct btrfs_block_group_cache *cache;
8923 cache = btrfs_lookup_first_block_group(fs_info, start);
8926 start = cache->key.objectid + cache->key.offset;
8927 ret = btrfs_insert_item(trans, fs_info->extent_root,
8928 &cache->key, &cache->item,
8929 sizeof(cache->item));
8931 fprintf(stderr, "Error adding block group\n");
8934 btrfs_extent_post_op(trans, fs_info->extent_root);
8937 ret = reset_balance(trans, fs_info);
8939 fprintf(stderr, "error resetting the pending balance\n");
8944 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8946 struct btrfs_path *path;
8947 struct btrfs_trans_handle *trans;
8948 struct btrfs_key key;
8951 printf("Recowing metadata block %llu\n", eb->start);
8952 key.objectid = btrfs_header_owner(eb);
8953 key.type = BTRFS_ROOT_ITEM_KEY;
8954 key.offset = (u64)-1;
8956 root = btrfs_read_fs_root(root->fs_info, &key);
8958 fprintf(stderr, "Couldn't find owner root %llu\n",
8960 return PTR_ERR(root);
8963 path = btrfs_alloc_path();
8967 trans = btrfs_start_transaction(root, 1);
8968 if (IS_ERR(trans)) {
8969 btrfs_free_path(path);
8970 return PTR_ERR(trans);
8973 path->lowest_level = btrfs_header_level(eb);
8974 if (path->lowest_level)
8975 btrfs_node_key_to_cpu(eb, &key, 0);
8977 btrfs_item_key_to_cpu(eb, &key, 0);
8979 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8980 btrfs_commit_transaction(trans, root);
8981 btrfs_free_path(path);
8985 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8987 struct btrfs_path *path;
8988 struct btrfs_trans_handle *trans;
8989 struct btrfs_key key;
8992 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
8993 bad->key.type, bad->key.offset);
8994 key.objectid = bad->root_id;
8995 key.type = BTRFS_ROOT_ITEM_KEY;
8996 key.offset = (u64)-1;
8998 root = btrfs_read_fs_root(root->fs_info, &key);
9000 fprintf(stderr, "Couldn't find owner root %llu\n",
9002 return PTR_ERR(root);
9005 path = btrfs_alloc_path();
9009 trans = btrfs_start_transaction(root, 1);
9010 if (IS_ERR(trans)) {
9011 btrfs_free_path(path);
9012 return PTR_ERR(trans);
9015 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9021 ret = btrfs_del_item(trans, root, path);
9023 btrfs_commit_transaction(trans, root);
9024 btrfs_free_path(path);
9028 static int zero_log_tree(struct btrfs_root *root)
9030 struct btrfs_trans_handle *trans;
9033 trans = btrfs_start_transaction(root, 1);
9034 if (IS_ERR(trans)) {
9035 ret = PTR_ERR(trans);
9038 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9039 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9040 ret = btrfs_commit_transaction(trans, root);
9044 static int populate_csum(struct btrfs_trans_handle *trans,
9045 struct btrfs_root *csum_root, char *buf, u64 start,
9052 while (offset < len) {
9053 sectorsize = csum_root->sectorsize;
9054 ret = read_extent_data(csum_root, buf, start + offset,
9058 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9059 start + offset, buf, sectorsize);
9062 offset += sectorsize;
9067 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9068 struct btrfs_root *csum_root,
9069 struct btrfs_root *cur_root)
9071 struct btrfs_path *path;
9072 struct btrfs_key key;
9073 struct extent_buffer *node;
9074 struct btrfs_file_extent_item *fi;
9081 path = btrfs_alloc_path();
9084 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9094 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9097 /* Iterate all regular file extents and fill its csum */
9099 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9101 if (key.type != BTRFS_EXTENT_DATA_KEY)
9103 node = path->nodes[0];
9104 slot = path->slots[0];
9105 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9106 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9108 start = btrfs_file_extent_disk_bytenr(node, fi);
9109 len = btrfs_file_extent_disk_num_bytes(node, fi);
9111 ret = populate_csum(trans, csum_root, buf, start, len);
9118 * TODO: if next leaf is corrupted, jump to nearest next valid
9121 ret = btrfs_next_item(cur_root, path);
9131 btrfs_free_path(path);
9136 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9137 struct btrfs_root *csum_root)
9139 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9140 struct btrfs_path *path;
9141 struct btrfs_root *tree_root = fs_info->tree_root;
9142 struct btrfs_root *cur_root;
9143 struct extent_buffer *node;
9144 struct btrfs_key key;
9148 path = btrfs_alloc_path();
9152 key.objectid = BTRFS_FS_TREE_OBJECTID;
9154 key.type = BTRFS_ROOT_ITEM_KEY;
9156 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9165 node = path->nodes[0];
9166 slot = path->slots[0];
9167 btrfs_item_key_to_cpu(node, &key, slot);
9168 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9170 if (key.type != BTRFS_ROOT_ITEM_KEY)
9172 if (!is_fstree(key.objectid))
9174 key.offset = (u64)-1;
9176 cur_root = btrfs_read_fs_root(fs_info, &key);
9177 if (IS_ERR(cur_root) || !cur_root) {
9178 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9182 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9187 ret = btrfs_next_item(tree_root, path);
9197 btrfs_free_path(path);
9201 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9202 struct btrfs_root *csum_root)
9204 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9205 struct btrfs_path *path;
9206 struct btrfs_extent_item *ei;
9207 struct extent_buffer *leaf;
9209 struct btrfs_key key;
9212 path = btrfs_alloc_path();
9217 key.type = BTRFS_EXTENT_ITEM_KEY;
9220 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9222 btrfs_free_path(path);
9226 buf = malloc(csum_root->sectorsize);
9228 btrfs_free_path(path);
9233 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9234 ret = btrfs_next_leaf(extent_root, path);
9242 leaf = path->nodes[0];
9244 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9245 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9250 ei = btrfs_item_ptr(leaf, path->slots[0],
9251 struct btrfs_extent_item);
9252 if (!(btrfs_extent_flags(leaf, ei) &
9253 BTRFS_EXTENT_FLAG_DATA)) {
9258 ret = populate_csum(trans, csum_root, buf, key.objectid,
9265 btrfs_free_path(path);
9271 * Recalculate the csum and put it into the csum tree.
9273 * Extent tree init will wipe out all the extent info, so in that case, we
9274 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9275 * will use fs/subvol trees to init the csum tree.
9277 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9278 struct btrfs_root *csum_root,
9282 return fill_csum_tree_from_fs(trans, csum_root);
9284 return fill_csum_tree_from_extent(trans, csum_root);
9287 static void free_roots_info_cache(void)
9289 if (!roots_info_cache)
9292 while (!cache_tree_empty(roots_info_cache)) {
9293 struct cache_extent *entry;
9294 struct root_item_info *rii;
9296 entry = first_cache_extent(roots_info_cache);
9299 remove_cache_extent(roots_info_cache, entry);
9300 rii = container_of(entry, struct root_item_info, cache_extent);
9304 free(roots_info_cache);
9305 roots_info_cache = NULL;
9308 static int build_roots_info_cache(struct btrfs_fs_info *info)
9311 struct btrfs_key key;
9312 struct extent_buffer *leaf;
9313 struct btrfs_path *path;
9315 if (!roots_info_cache) {
9316 roots_info_cache = malloc(sizeof(*roots_info_cache));
9317 if (!roots_info_cache)
9319 cache_tree_init(roots_info_cache);
9322 path = btrfs_alloc_path();
9327 key.type = BTRFS_EXTENT_ITEM_KEY;
9330 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9333 leaf = path->nodes[0];
9336 struct btrfs_key found_key;
9337 struct btrfs_extent_item *ei;
9338 struct btrfs_extent_inline_ref *iref;
9339 int slot = path->slots[0];
9344 struct cache_extent *entry;
9345 struct root_item_info *rii;
9347 if (slot >= btrfs_header_nritems(leaf)) {
9348 ret = btrfs_next_leaf(info->extent_root, path);
9355 leaf = path->nodes[0];
9356 slot = path->slots[0];
9359 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9361 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9362 found_key.type != BTRFS_METADATA_ITEM_KEY)
9365 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9366 flags = btrfs_extent_flags(leaf, ei);
9368 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9369 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9372 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9373 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9374 level = found_key.offset;
9376 struct btrfs_tree_block_info *binfo;
9378 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9379 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9380 level = btrfs_tree_block_level(leaf, binfo);
9384 * For a root extent, it must be of the following type and the
9385 * first (and only one) iref in the item.
9387 type = btrfs_extent_inline_ref_type(leaf, iref);
9388 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9391 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9392 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9394 rii = malloc(sizeof(struct root_item_info));
9399 rii->cache_extent.start = root_id;
9400 rii->cache_extent.size = 1;
9401 rii->level = (u8)-1;
9402 entry = &rii->cache_extent;
9403 ret = insert_cache_extent(roots_info_cache, entry);
9406 rii = container_of(entry, struct root_item_info,
9410 ASSERT(rii->cache_extent.start == root_id);
9411 ASSERT(rii->cache_extent.size == 1);
9413 if (level > rii->level || rii->level == (u8)-1) {
9415 rii->bytenr = found_key.objectid;
9416 rii->gen = btrfs_extent_generation(leaf, ei);
9417 rii->node_count = 1;
9418 } else if (level == rii->level) {
9426 btrfs_free_path(path);
9431 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9432 struct btrfs_path *path,
9433 const struct btrfs_key *root_key,
9434 const int read_only_mode)
9436 const u64 root_id = root_key->objectid;
9437 struct cache_extent *entry;
9438 struct root_item_info *rii;
9439 struct btrfs_root_item ri;
9440 unsigned long offset;
9442 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9445 "Error: could not find extent items for root %llu\n",
9446 root_key->objectid);
9450 rii = container_of(entry, struct root_item_info, cache_extent);
9451 ASSERT(rii->cache_extent.start == root_id);
9452 ASSERT(rii->cache_extent.size == 1);
9454 if (rii->node_count != 1) {
9456 "Error: could not find btree root extent for root %llu\n",
9461 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9462 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9464 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9465 btrfs_root_level(&ri) != rii->level ||
9466 btrfs_root_generation(&ri) != rii->gen) {
9469 * If we're in repair mode but our caller told us to not update
9470 * the root item, i.e. just check if it needs to be updated, don't
9471 * print this message, since the caller will call us again shortly
9472 * for the same root item without read only mode (the caller will
9473 * open a transaction first).
9475 if (!(read_only_mode && repair))
9477 "%sroot item for root %llu,"
9478 " current bytenr %llu, current gen %llu, current level %u,"
9479 " new bytenr %llu, new gen %llu, new level %u\n",
9480 (read_only_mode ? "" : "fixing "),
9482 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9483 btrfs_root_level(&ri),
9484 rii->bytenr, rii->gen, rii->level);
9486 if (btrfs_root_generation(&ri) > rii->gen) {
9488 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9489 root_id, btrfs_root_generation(&ri), rii->gen);
9493 if (!read_only_mode) {
9494 btrfs_set_root_bytenr(&ri, rii->bytenr);
9495 btrfs_set_root_level(&ri, rii->level);
9496 btrfs_set_root_generation(&ri, rii->gen);
9497 write_extent_buffer(path->nodes[0], &ri,
9498 offset, sizeof(ri));
9508 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9509 * caused read-only snapshots to be corrupted if they were created at a moment
9510 * when the source subvolume/snapshot had orphan items. The issue was that the
9511 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9512 * node instead of the post orphan cleanup root node.
9513 * So this function, and its callees, just detects and fixes those cases. Even
9514 * though the regression was for read-only snapshots, this function applies to
9515 * any snapshot/subvolume root.
9516 * This must be run before any other repair code - not doing it so, makes other
9517 * repair code delete or modify backrefs in the extent tree for example, which
9518 * will result in an inconsistent fs after repairing the root items.
9520 static int repair_root_items(struct btrfs_fs_info *info)
9522 struct btrfs_path *path = NULL;
9523 struct btrfs_key key;
9524 struct extent_buffer *leaf;
9525 struct btrfs_trans_handle *trans = NULL;
9530 ret = build_roots_info_cache(info);
9534 path = btrfs_alloc_path();
9540 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9541 key.type = BTRFS_ROOT_ITEM_KEY;
9546 * Avoid opening and committing transactions if a leaf doesn't have
9547 * any root items that need to be fixed, so that we avoid rotating
9548 * backup roots unnecessarily.
9551 trans = btrfs_start_transaction(info->tree_root, 1);
9552 if (IS_ERR(trans)) {
9553 ret = PTR_ERR(trans);
9558 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9562 leaf = path->nodes[0];
9565 struct btrfs_key found_key;
9567 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9568 int no_more_keys = find_next_key(path, &key);
9570 btrfs_release_path(path);
9572 ret = btrfs_commit_transaction(trans,
9584 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9586 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9588 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9591 ret = maybe_repair_root_item(info, path, &found_key,
9596 if (!trans && repair) {
9599 btrfs_release_path(path);
9609 free_roots_info_cache();
9610 btrfs_free_path(path);
9612 btrfs_commit_transaction(trans, info->tree_root);
9619 const char * const cmd_check_usage[] = {
9620 "btrfs check [options] <device>",
9621 "Check structural integrity of a filesystem (unmounted).",
9622 "Check structural integrity of an unmounted filesystem. Verify internal",
9623 "trees' consistency and item connectivity. In the repair mode try to",
9624 "fix the problems found.",
9625 "WARNING: the repair mode is considered dangerous",
9627 "-s|--super <superblock> use this superblock copy",
9628 "-b|--backup use the first valid backup root copy",
9629 "--repair try to repair the filesystem",
9630 "--readonly run in read-only mode (default)",
9631 "--init-csum-tree create a new CRC tree",
9632 "--init-extent-tree create a new extent tree",
9633 "--check-data-csum verify checksums of data blocks",
9634 "-Q|--qgroup-report print a report on qgroup consistency",
9635 "-E|--subvol-extents <subvolid>",
9636 " print subvolume extents and sharing state",
9637 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9638 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9639 "-p|--progress indicate progress",
9643 int cmd_check(int argc, char **argv)
9645 struct cache_tree root_cache;
9646 struct btrfs_root *root;
9647 struct btrfs_fs_info *info;
9650 u64 tree_root_bytenr = 0;
9651 u64 chunk_root_bytenr = 0;
9652 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9655 int init_csum_tree = 0;
9657 int qgroup_report = 0;
9658 int qgroups_repaired = 0;
9659 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9663 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9664 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9665 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9666 static const struct option long_options[] = {
9667 { "super", required_argument, NULL, 's' },
9668 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9669 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9670 { "init-csum-tree", no_argument, NULL,
9671 GETOPT_VAL_INIT_CSUM },
9672 { "init-extent-tree", no_argument, NULL,
9673 GETOPT_VAL_INIT_EXTENT },
9674 { "check-data-csum", no_argument, NULL,
9675 GETOPT_VAL_CHECK_CSUM },
9676 { "backup", no_argument, NULL, 'b' },
9677 { "subvol-extents", required_argument, NULL, 'E' },
9678 { "qgroup-report", no_argument, NULL, 'Q' },
9679 { "tree-root", required_argument, NULL, 'r' },
9680 { "chunk-root", required_argument, NULL,
9681 GETOPT_VAL_CHUNK_TREE },
9682 { "progress", no_argument, NULL, 'p' },
9686 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9690 case 'a': /* ignored */ break;
9692 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9695 num = arg_strtou64(optarg);
9696 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9698 "ERROR: super mirror should be less than: %d\n",
9699 BTRFS_SUPER_MIRROR_MAX);
9702 bytenr = btrfs_sb_offset(((int)num));
9703 printf("using SB copy %llu, bytenr %llu\n", num,
9704 (unsigned long long)bytenr);
9710 subvolid = arg_strtou64(optarg);
9713 tree_root_bytenr = arg_strtou64(optarg);
9715 case GETOPT_VAL_CHUNK_TREE:
9716 chunk_root_bytenr = arg_strtou64(optarg);
9719 ctx.progress_enabled = true;
9723 usage(cmd_check_usage);
9724 case GETOPT_VAL_REPAIR:
9725 printf("enabling repair mode\n");
9727 ctree_flags |= OPEN_CTREE_WRITES;
9729 case GETOPT_VAL_READONLY:
9732 case GETOPT_VAL_INIT_CSUM:
9733 printf("Creating a new CRC tree\n");
9736 ctree_flags |= OPEN_CTREE_WRITES;
9738 case GETOPT_VAL_INIT_EXTENT:
9739 init_extent_tree = 1;
9740 ctree_flags |= (OPEN_CTREE_WRITES |
9741 OPEN_CTREE_NO_BLOCK_GROUPS);
9744 case GETOPT_VAL_CHECK_CSUM:
9745 check_data_csum = 1;
9750 if (check_argc_exact(argc - optind, 1))
9751 usage(cmd_check_usage);
9753 if (ctx.progress_enabled) {
9754 ctx.tp = TASK_NOTHING;
9755 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9758 /* This check is the only reason for --readonly to exist */
9759 if (readonly && repair) {
9760 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9765 cache_tree_init(&root_cache);
9767 if((ret = check_mounted(argv[optind])) < 0) {
9768 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9771 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9776 /* only allow partial opening under repair mode */
9778 ctree_flags |= OPEN_CTREE_PARTIAL;
9780 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9781 chunk_root_bytenr, ctree_flags);
9783 fprintf(stderr, "Couldn't open file system\n");
9789 root = info->fs_root;
9792 * repair mode will force us to commit transaction which
9793 * will make us fail to load log tree when mounting.
9795 if (repair && btrfs_super_log_root(info->super_copy)) {
9796 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9801 ret = zero_log_tree(root);
9803 fprintf(stderr, "fail to zero log tree\n");
9808 uuid_unparse(info->super_copy->fsid, uuidbuf);
9809 if (qgroup_report) {
9810 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9812 ret = qgroup_verify_all(info);
9818 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9819 subvolid, argv[optind], uuidbuf);
9820 ret = print_extent_state(info, subvolid);
9823 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9825 if (!extent_buffer_uptodate(info->tree_root->node) ||
9826 !extent_buffer_uptodate(info->dev_root->node) ||
9827 !extent_buffer_uptodate(info->chunk_root->node)) {
9828 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9833 if (init_extent_tree || init_csum_tree) {
9834 struct btrfs_trans_handle *trans;
9836 trans = btrfs_start_transaction(info->extent_root, 0);
9837 if (IS_ERR(trans)) {
9838 fprintf(stderr, "Error starting transaction\n");
9839 ret = PTR_ERR(trans);
9843 if (init_extent_tree) {
9844 printf("Creating a new extent tree\n");
9845 ret = reinit_extent_tree(trans, info);
9850 if (init_csum_tree) {
9851 fprintf(stderr, "Reinit crc root\n");
9852 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9854 fprintf(stderr, "crc root initialization failed\n");
9859 ret = fill_csum_tree(trans, info->csum_root,
9862 fprintf(stderr, "crc refilling failed\n");
9867 * Ok now we commit and run the normal fsck, which will add
9868 * extent entries for all of the items it finds.
9870 ret = btrfs_commit_transaction(trans, info->extent_root);
9874 if (!extent_buffer_uptodate(info->extent_root->node)) {
9875 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9879 if (!extent_buffer_uptodate(info->csum_root->node)) {
9880 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9885 if (!ctx.progress_enabled)
9886 fprintf(stderr, "checking extents\n");
9887 ret = check_chunks_and_extents(root);
9889 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9891 ret = repair_root_items(info);
9895 fprintf(stderr, "Fixed %d roots.\n", ret);
9897 } else if (ret > 0) {
9899 "Found %d roots with an outdated root item.\n",
9902 "Please run a filesystem check with the option --repair to fix them.\n");
9907 if (!ctx.progress_enabled) {
9908 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9909 fprintf(stderr, "checking free space tree\n");
9911 fprintf(stderr, "checking free space cache\n");
9913 ret = check_space_cache(root);
9918 * We used to have to have these hole extents in between our real
9919 * extents so if we don't have this flag set we need to make sure there
9920 * are no gaps in the file extents for inodes, otherwise we can just
9921 * ignore it when this happens.
9923 no_holes = btrfs_fs_incompat(root->fs_info,
9924 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9925 if (!ctx.progress_enabled)
9926 fprintf(stderr, "checking fs roots\n");
9927 ret = check_fs_roots(root, &root_cache);
9931 fprintf(stderr, "checking csums\n");
9932 ret = check_csums(root);
9936 fprintf(stderr, "checking root refs\n");
9937 ret = check_root_refs(root, &root_cache);
9941 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9942 struct extent_buffer *eb;
9944 eb = list_first_entry(&root->fs_info->recow_ebs,
9945 struct extent_buffer, recow);
9946 list_del_init(&eb->recow);
9947 ret = recow_extent_buffer(root, eb);
9952 while (!list_empty(&delete_items)) {
9953 struct bad_item *bad;
9955 bad = list_first_entry(&delete_items, struct bad_item, list);
9956 list_del_init(&bad->list);
9958 ret = delete_bad_item(root, bad);
9962 if (info->quota_enabled) {
9964 fprintf(stderr, "checking quota groups\n");
9965 err = qgroup_verify_all(info);
9969 err = repair_qgroups(info, &qgroups_repaired);
9974 if (!list_empty(&root->fs_info->recow_ebs)) {
9975 fprintf(stderr, "Transid errors in file system\n");
9979 /* Don't override original ret */
9980 if (!ret && qgroups_repaired)
9981 ret = qgroups_repaired;
9983 if (found_old_backref) { /*
9984 * there was a disk format change when mixed
9985 * backref was in testing tree. The old format
9986 * existed about one week.
9988 printf("\n * Found old mixed backref format. "
9989 "The old format is not supported! *"
9990 "\n * Please mount the FS in readonly mode, "
9991 "backup data and re-format the FS. *\n\n");
9994 printf("found %llu bytes used err is %d\n",
9995 (unsigned long long)bytes_used, ret);
9996 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
9997 printf("total tree bytes: %llu\n",
9998 (unsigned long long)total_btree_bytes);
9999 printf("total fs tree bytes: %llu\n",
10000 (unsigned long long)total_fs_tree_bytes);
10001 printf("total extent tree bytes: %llu\n",
10002 (unsigned long long)total_extent_tree_bytes);
10003 printf("btree space waste bytes: %llu\n",
10004 (unsigned long long)btree_space_waste);
10005 printf("file data blocks allocated: %llu\n referenced %llu\n",
10006 (unsigned long long)data_bytes_allocated,
10007 (unsigned long long)data_bytes_referenced);
10009 free_qgroup_counts();
10010 free_root_recs_tree(&root_cache);
10014 if (ctx.progress_enabled)
10015 task_deinit(ctx.info);