2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int repair = 0;
71 static int no_holes = 0;
72 static int init_extent_tree = 0;
73 static int check_data_csum = 0;
74 static struct btrfs_fs_info *global_info;
75 static struct task_ctx ctx = { 0 };
76 static struct cache_tree *roots_info_cache = NULL;
78 struct extent_backref {
79 struct list_head list;
81 unsigned int is_data:1;
82 unsigned int found_extent_tree:1;
83 unsigned int full_backref:1;
84 unsigned int found_ref:1;
85 unsigned int broken:1;
88 static inline struct extent_backref* to_extent_backref(struct list_head *entry)
90 return list_entry(entry, struct extent_backref, list);
93 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
95 return rb_entry(node, struct extent_backref, node);
99 struct extent_backref node;
113 static inline struct data_backref* to_data_backref(struct extent_backref *back)
115 return container_of(back, struct data_backref, node);
118 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
120 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
121 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
122 struct data_backref *back1 = to_data_backref(ext1);
123 struct data_backref *back2 = to_data_backref(ext2);
125 WARN_ON(!ext1->is_data);
126 WARN_ON(!ext2->is_data);
128 /* parent and root are a union, so this covers both */
129 if (back1->parent > back2->parent)
131 if (back1->parent < back2->parent)
134 /* This is a full backref and the parents match. */
135 if (back1->node.full_backref)
138 if (back1->owner > back2->owner)
140 if (back1->owner < back2->owner)
143 if (back1->offset > back2->offset)
145 if (back1->offset < back2->offset)
148 if (back1->bytes > back2->bytes)
150 if (back1->bytes < back2->bytes)
153 if (back1->found_ref && back2->found_ref) {
154 if (back1->disk_bytenr > back2->disk_bytenr)
156 if (back1->disk_bytenr < back2->disk_bytenr)
159 if (back1->found_ref > back2->found_ref)
161 if (back1->found_ref < back2->found_ref)
169 * Much like data_backref, just removed the undetermined members
170 * and change it to use list_head.
171 * During extent scan, it is stored in root->orphan_data_extent.
172 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
174 struct orphan_data_extent {
175 struct list_head list;
183 struct tree_backref {
184 struct extent_backref node;
191 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
193 return container_of(back, struct tree_backref, node);
196 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
198 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
199 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
200 struct tree_backref *back1 = to_tree_backref(ext1);
201 struct tree_backref *back2 = to_tree_backref(ext2);
203 WARN_ON(ext1->is_data);
204 WARN_ON(ext2->is_data);
206 /* parent and root are a union, so this covers both */
207 if (back1->parent > back2->parent)
209 if (back1->parent < back2->parent)
215 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
217 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
218 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
220 if (ext1->is_data > ext2->is_data)
223 if (ext1->is_data < ext2->is_data)
226 if (ext1->full_backref > ext2->full_backref)
228 if (ext1->full_backref < ext2->full_backref)
232 return compare_data_backref(node1, node2);
234 return compare_tree_backref(node1, node2);
237 /* Explicit initialization for extent_record::flag_block_full_backref */
238 enum { FLAG_UNSET = 2 };
240 struct extent_record {
241 struct list_head backrefs;
242 struct list_head dups;
243 struct rb_root backref_tree;
244 struct list_head list;
245 struct cache_extent cache;
246 struct btrfs_disk_key parent_key;
251 u64 extent_item_refs;
253 u64 parent_generation;
257 unsigned int flag_block_full_backref:2;
258 unsigned int found_rec:1;
259 unsigned int content_checked:1;
260 unsigned int owner_ref_checked:1;
261 unsigned int is_root:1;
262 unsigned int metadata:1;
263 unsigned int bad_full_backref:1;
264 unsigned int crossing_stripes:1;
265 unsigned int wrong_chunk_type:1;
268 static inline struct extent_record* to_extent_record(struct list_head *entry)
270 return container_of(entry, struct extent_record, list);
273 struct inode_backref {
274 struct list_head list;
275 unsigned int found_dir_item:1;
276 unsigned int found_dir_index:1;
277 unsigned int found_inode_ref:1;
278 unsigned int filetype:8;
280 unsigned int ref_type;
287 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
289 return list_entry(entry, struct inode_backref, list);
292 struct root_item_record {
293 struct list_head list;
300 struct btrfs_key drop_key;
303 #define REF_ERR_NO_DIR_ITEM (1 << 0)
304 #define REF_ERR_NO_DIR_INDEX (1 << 1)
305 #define REF_ERR_NO_INODE_REF (1 << 2)
306 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
307 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
308 #define REF_ERR_DUP_INODE_REF (1 << 5)
309 #define REF_ERR_INDEX_UNMATCH (1 << 6)
310 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
311 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
312 #define REF_ERR_NO_ROOT_REF (1 << 9)
313 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
314 #define REF_ERR_DUP_ROOT_REF (1 << 11)
315 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
317 struct file_extent_hole {
323 struct inode_record {
324 struct list_head backrefs;
325 unsigned int checked:1;
326 unsigned int merging:1;
327 unsigned int found_inode_item:1;
328 unsigned int found_dir_item:1;
329 unsigned int found_file_extent:1;
330 unsigned int found_csum_item:1;
331 unsigned int some_csum_missing:1;
332 unsigned int nodatasum:1;
345 struct rb_root holes;
346 struct list_head orphan_extents;
351 #define I_ERR_NO_INODE_ITEM (1 << 0)
352 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
353 #define I_ERR_DUP_INODE_ITEM (1 << 2)
354 #define I_ERR_DUP_DIR_INDEX (1 << 3)
355 #define I_ERR_ODD_DIR_ITEM (1 << 4)
356 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
357 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
358 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
359 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
360 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
361 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
362 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
363 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
364 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
365 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
367 struct root_backref {
368 struct list_head list;
369 unsigned int found_dir_item:1;
370 unsigned int found_dir_index:1;
371 unsigned int found_back_ref:1;
372 unsigned int found_forward_ref:1;
373 unsigned int reachable:1;
382 static inline struct root_backref* to_root_backref(struct list_head *entry)
384 return list_entry(entry, struct root_backref, list);
388 struct list_head backrefs;
389 struct cache_extent cache;
390 unsigned int found_root_item:1;
396 struct cache_extent cache;
401 struct cache_extent cache;
402 struct cache_tree root_cache;
403 struct cache_tree inode_cache;
404 struct inode_record *current;
413 struct walk_control {
414 struct cache_tree shared;
415 struct shared_node *nodes[BTRFS_MAX_LEVEL];
421 struct btrfs_key key;
423 struct list_head list;
426 struct extent_entry {
431 struct list_head list;
434 struct root_item_info {
435 /* level of the root */
437 /* number of nodes at this level, must be 1 for a root */
441 struct cache_extent cache_extent;
444 static void *print_status_check(void *p)
446 struct task_ctx *priv = p;
447 const char work_indicator[] = { '.', 'o', 'O', 'o' };
449 static char *task_position_string[] = {
451 "checking free space cache",
455 task_period_start(priv->info, 1000 /* 1s */);
457 if (priv->tp == TASK_NOTHING)
461 printf("%s [%c]\r", task_position_string[priv->tp],
462 work_indicator[count % 4]);
465 task_period_wait(priv->info);
470 static int print_status_return(void *p)
478 /* Compatible function to allow reuse of old codes */
479 static u64 first_extent_gap(struct rb_root *holes)
481 struct file_extent_hole *hole;
483 if (RB_EMPTY_ROOT(holes))
486 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
490 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
492 struct file_extent_hole *hole1;
493 struct file_extent_hole *hole2;
495 hole1 = rb_entry(node1, struct file_extent_hole, node);
496 hole2 = rb_entry(node2, struct file_extent_hole, node);
498 if (hole1->start > hole2->start)
500 if (hole1->start < hole2->start)
502 /* Now hole1->start == hole2->start */
503 if (hole1->len >= hole2->len)
505 * Hole 1 will be merge center
506 * Same hole will be merged later
509 /* Hole 2 will be merge center */
514 * Add a hole to the record
516 * This will do hole merge for copy_file_extent_holes(),
517 * which will ensure there won't be continuous holes.
519 static int add_file_extent_hole(struct rb_root *holes,
522 struct file_extent_hole *hole;
523 struct file_extent_hole *prev = NULL;
524 struct file_extent_hole *next = NULL;
526 hole = malloc(sizeof(*hole));
531 /* Since compare will not return 0, no -EEXIST will happen */
532 rb_insert(holes, &hole->node, compare_hole);
534 /* simple merge with previous hole */
535 if (rb_prev(&hole->node))
536 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
538 if (prev && prev->start + prev->len >= hole->start) {
539 hole->len = hole->start + hole->len - prev->start;
540 hole->start = prev->start;
541 rb_erase(&prev->node, holes);
546 /* iterate merge with next holes */
548 if (!rb_next(&hole->node))
550 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
552 if (hole->start + hole->len >= next->start) {
553 if (hole->start + hole->len <= next->start + next->len)
554 hole->len = next->start + next->len -
556 rb_erase(&next->node, holes);
565 static int compare_hole_range(struct rb_node *node, void *data)
567 struct file_extent_hole *hole;
570 hole = (struct file_extent_hole *)data;
573 hole = rb_entry(node, struct file_extent_hole, node);
574 if (start < hole->start)
576 if (start >= hole->start && start < hole->start + hole->len)
582 * Delete a hole in the record
584 * This will do the hole split and is much restrict than add.
586 static int del_file_extent_hole(struct rb_root *holes,
589 struct file_extent_hole *hole;
590 struct file_extent_hole tmp;
595 struct rb_node *node;
602 node = rb_search(holes, &tmp, compare_hole_range, NULL);
605 hole = rb_entry(node, struct file_extent_hole, node);
606 if (start + len > hole->start + hole->len)
610 * Now there will be no overlap, delete the hole and re-add the
611 * split(s) if they exists.
613 if (start > hole->start) {
614 prev_start = hole->start;
615 prev_len = start - hole->start;
618 if (hole->start + hole->len > start + len) {
619 next_start = start + len;
620 next_len = hole->start + hole->len - start - len;
623 rb_erase(node, holes);
626 ret = add_file_extent_hole(holes, prev_start, prev_len);
631 ret = add_file_extent_hole(holes, next_start, next_len);
638 static int copy_file_extent_holes(struct rb_root *dst,
641 struct file_extent_hole *hole;
642 struct rb_node *node;
645 node = rb_first(src);
647 hole = rb_entry(node, struct file_extent_hole, node);
648 ret = add_file_extent_hole(dst, hole->start, hole->len);
651 node = rb_next(node);
656 static void free_file_extent_holes(struct rb_root *holes)
658 struct rb_node *node;
659 struct file_extent_hole *hole;
661 node = rb_first(holes);
663 hole = rb_entry(node, struct file_extent_hole, node);
664 rb_erase(node, holes);
666 node = rb_first(holes);
670 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
672 static void record_root_in_trans(struct btrfs_trans_handle *trans,
673 struct btrfs_root *root)
675 if (root->last_trans != trans->transid) {
676 root->track_dirty = 1;
677 root->last_trans = trans->transid;
678 root->commit_root = root->node;
679 extent_buffer_get(root->node);
683 static u8 imode_to_type(u32 imode)
686 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
687 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
688 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
689 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
690 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
691 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
692 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
693 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
696 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
700 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
702 struct device_record *rec1;
703 struct device_record *rec2;
705 rec1 = rb_entry(node1, struct device_record, node);
706 rec2 = rb_entry(node2, struct device_record, node);
707 if (rec1->devid > rec2->devid)
709 else if (rec1->devid < rec2->devid)
715 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
717 struct inode_record *rec;
718 struct inode_backref *backref;
719 struct inode_backref *orig;
720 struct inode_backref *tmp;
721 struct orphan_data_extent *src_orphan;
722 struct orphan_data_extent *dst_orphan;
726 rec = malloc(sizeof(*rec));
728 return ERR_PTR(-ENOMEM);
729 memcpy(rec, orig_rec, sizeof(*rec));
731 INIT_LIST_HEAD(&rec->backrefs);
732 INIT_LIST_HEAD(&rec->orphan_extents);
733 rec->holes = RB_ROOT;
735 list_for_each_entry(orig, &orig_rec->backrefs, list) {
736 size = sizeof(*orig) + orig->namelen + 1;
737 backref = malloc(size);
742 memcpy(backref, orig, size);
743 list_add_tail(&backref->list, &rec->backrefs);
745 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
746 dst_orphan = malloc(sizeof(*dst_orphan));
751 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
752 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
754 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
760 if (!list_empty(&rec->backrefs))
761 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
762 list_del(&orig->list);
766 if (!list_empty(&rec->orphan_extents))
767 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
768 list_del(&orig->list);
777 static void print_orphan_data_extents(struct list_head *orphan_extents,
780 struct orphan_data_extent *orphan;
782 if (list_empty(orphan_extents))
784 printf("The following data extent is lost in tree %llu:\n",
786 list_for_each_entry(orphan, orphan_extents, list) {
787 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
788 orphan->objectid, orphan->offset, orphan->disk_bytenr,
793 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
795 u64 root_objectid = root->root_key.objectid;
796 int errors = rec->errors;
800 /* reloc root errors, we print its corresponding fs root objectid*/
801 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
802 root_objectid = root->root_key.offset;
803 fprintf(stderr, "reloc");
805 fprintf(stderr, "root %llu inode %llu errors %x",
806 (unsigned long long) root_objectid,
807 (unsigned long long) rec->ino, rec->errors);
809 if (errors & I_ERR_NO_INODE_ITEM)
810 fprintf(stderr, ", no inode item");
811 if (errors & I_ERR_NO_ORPHAN_ITEM)
812 fprintf(stderr, ", no orphan item");
813 if (errors & I_ERR_DUP_INODE_ITEM)
814 fprintf(stderr, ", dup inode item");
815 if (errors & I_ERR_DUP_DIR_INDEX)
816 fprintf(stderr, ", dup dir index");
817 if (errors & I_ERR_ODD_DIR_ITEM)
818 fprintf(stderr, ", odd dir item");
819 if (errors & I_ERR_ODD_FILE_EXTENT)
820 fprintf(stderr, ", odd file extent");
821 if (errors & I_ERR_BAD_FILE_EXTENT)
822 fprintf(stderr, ", bad file extent");
823 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
824 fprintf(stderr, ", file extent overlap");
825 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
826 fprintf(stderr, ", file extent discount");
827 if (errors & I_ERR_DIR_ISIZE_WRONG)
828 fprintf(stderr, ", dir isize wrong");
829 if (errors & I_ERR_FILE_NBYTES_WRONG)
830 fprintf(stderr, ", nbytes wrong");
831 if (errors & I_ERR_ODD_CSUM_ITEM)
832 fprintf(stderr, ", odd csum item");
833 if (errors & I_ERR_SOME_CSUM_MISSING)
834 fprintf(stderr, ", some csum missing");
835 if (errors & I_ERR_LINK_COUNT_WRONG)
836 fprintf(stderr, ", link count wrong");
837 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
838 fprintf(stderr, ", orphan file extent");
839 fprintf(stderr, "\n");
840 /* Print the orphan extents if needed */
841 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
842 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
844 /* Print the holes if needed */
845 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
846 struct file_extent_hole *hole;
847 struct rb_node *node;
850 node = rb_first(&rec->holes);
851 fprintf(stderr, "Found file extent holes:\n");
854 hole = rb_entry(node, struct file_extent_hole, node);
855 fprintf(stderr, "\tstart: %llu, len: %llu\n",
856 hole->start, hole->len);
857 node = rb_next(node);
860 fprintf(stderr, "\tstart: 0, len: %llu\n",
861 round_up(rec->isize, root->sectorsize));
865 static void print_ref_error(int errors)
867 if (errors & REF_ERR_NO_DIR_ITEM)
868 fprintf(stderr, ", no dir item");
869 if (errors & REF_ERR_NO_DIR_INDEX)
870 fprintf(stderr, ", no dir index");
871 if (errors & REF_ERR_NO_INODE_REF)
872 fprintf(stderr, ", no inode ref");
873 if (errors & REF_ERR_DUP_DIR_ITEM)
874 fprintf(stderr, ", dup dir item");
875 if (errors & REF_ERR_DUP_DIR_INDEX)
876 fprintf(stderr, ", dup dir index");
877 if (errors & REF_ERR_DUP_INODE_REF)
878 fprintf(stderr, ", dup inode ref");
879 if (errors & REF_ERR_INDEX_UNMATCH)
880 fprintf(stderr, ", index mismatch");
881 if (errors & REF_ERR_FILETYPE_UNMATCH)
882 fprintf(stderr, ", filetype mismatch");
883 if (errors & REF_ERR_NAME_TOO_LONG)
884 fprintf(stderr, ", name too long");
885 if (errors & REF_ERR_NO_ROOT_REF)
886 fprintf(stderr, ", no root ref");
887 if (errors & REF_ERR_NO_ROOT_BACKREF)
888 fprintf(stderr, ", no root backref");
889 if (errors & REF_ERR_DUP_ROOT_REF)
890 fprintf(stderr, ", dup root ref");
891 if (errors & REF_ERR_DUP_ROOT_BACKREF)
892 fprintf(stderr, ", dup root backref");
893 fprintf(stderr, "\n");
896 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
899 struct ptr_node *node;
900 struct cache_extent *cache;
901 struct inode_record *rec = NULL;
904 cache = lookup_cache_extent(inode_cache, ino, 1);
906 node = container_of(cache, struct ptr_node, cache);
908 if (mod && rec->refs > 1) {
909 node->data = clone_inode_rec(rec);
910 if (IS_ERR(node->data))
916 rec = calloc(1, sizeof(*rec));
918 return ERR_PTR(-ENOMEM);
920 rec->extent_start = (u64)-1;
922 INIT_LIST_HEAD(&rec->backrefs);
923 INIT_LIST_HEAD(&rec->orphan_extents);
924 rec->holes = RB_ROOT;
926 node = malloc(sizeof(*node));
929 return ERR_PTR(-ENOMEM);
931 node->cache.start = ino;
932 node->cache.size = 1;
935 if (ino == BTRFS_FREE_INO_OBJECTID)
938 ret = insert_cache_extent(inode_cache, &node->cache);
940 return ERR_PTR(-EEXIST);
945 static void free_orphan_data_extents(struct list_head *orphan_extents)
947 struct orphan_data_extent *orphan;
949 while (!list_empty(orphan_extents)) {
950 orphan = list_entry(orphan_extents->next,
951 struct orphan_data_extent, list);
952 list_del(&orphan->list);
957 static void free_inode_rec(struct inode_record *rec)
959 struct inode_backref *backref;
964 while (!list_empty(&rec->backrefs)) {
965 backref = to_inode_backref(rec->backrefs.next);
966 list_del(&backref->list);
969 free_orphan_data_extents(&rec->orphan_extents);
970 free_file_extent_holes(&rec->holes);
974 static int can_free_inode_rec(struct inode_record *rec)
976 if (!rec->errors && rec->checked && rec->found_inode_item &&
977 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
982 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
983 struct inode_record *rec)
985 struct cache_extent *cache;
986 struct inode_backref *tmp, *backref;
987 struct ptr_node *node;
988 unsigned char filetype;
990 if (!rec->found_inode_item)
993 filetype = imode_to_type(rec->imode);
994 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
995 if (backref->found_dir_item && backref->found_dir_index) {
996 if (backref->filetype != filetype)
997 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
998 if (!backref->errors && backref->found_inode_ref &&
999 rec->nlink == rec->found_link) {
1000 list_del(&backref->list);
1006 if (!rec->checked || rec->merging)
1009 if (S_ISDIR(rec->imode)) {
1010 if (rec->found_size != rec->isize)
1011 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1012 if (rec->found_file_extent)
1013 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1014 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1015 if (rec->found_dir_item)
1016 rec->errors |= I_ERR_ODD_DIR_ITEM;
1017 if (rec->found_size != rec->nbytes)
1018 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1019 if (rec->nlink > 0 && !no_holes &&
1020 (rec->extent_end < rec->isize ||
1021 first_extent_gap(&rec->holes) < rec->isize))
1022 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1025 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1026 if (rec->found_csum_item && rec->nodatasum)
1027 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1028 if (rec->some_csum_missing && !rec->nodatasum)
1029 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1032 BUG_ON(rec->refs != 1);
1033 if (can_free_inode_rec(rec)) {
1034 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1035 node = container_of(cache, struct ptr_node, cache);
1036 BUG_ON(node->data != rec);
1037 remove_cache_extent(inode_cache, &node->cache);
1039 free_inode_rec(rec);
1043 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1045 struct btrfs_path path;
1046 struct btrfs_key key;
1049 key.objectid = BTRFS_ORPHAN_OBJECTID;
1050 key.type = BTRFS_ORPHAN_ITEM_KEY;
1053 btrfs_init_path(&path);
1054 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1055 btrfs_release_path(&path);
1061 static int process_inode_item(struct extent_buffer *eb,
1062 int slot, struct btrfs_key *key,
1063 struct shared_node *active_node)
1065 struct inode_record *rec;
1066 struct btrfs_inode_item *item;
1068 rec = active_node->current;
1069 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1070 if (rec->found_inode_item) {
1071 rec->errors |= I_ERR_DUP_INODE_ITEM;
1074 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1075 rec->nlink = btrfs_inode_nlink(eb, item);
1076 rec->isize = btrfs_inode_size(eb, item);
1077 rec->nbytes = btrfs_inode_nbytes(eb, item);
1078 rec->imode = btrfs_inode_mode(eb, item);
1079 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1081 rec->found_inode_item = 1;
1082 if (rec->nlink == 0)
1083 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1084 maybe_free_inode_rec(&active_node->inode_cache, rec);
1088 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1090 int namelen, u64 dir)
1092 struct inode_backref *backref;
1094 list_for_each_entry(backref, &rec->backrefs, list) {
1095 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1097 if (backref->dir != dir || backref->namelen != namelen)
1099 if (memcmp(name, backref->name, namelen))
1104 backref = malloc(sizeof(*backref) + namelen + 1);
1107 memset(backref, 0, sizeof(*backref));
1109 backref->namelen = namelen;
1110 memcpy(backref->name, name, namelen);
1111 backref->name[namelen] = '\0';
1112 list_add_tail(&backref->list, &rec->backrefs);
1116 static int add_inode_backref(struct cache_tree *inode_cache,
1117 u64 ino, u64 dir, u64 index,
1118 const char *name, int namelen,
1119 int filetype, int itemtype, int errors)
1121 struct inode_record *rec;
1122 struct inode_backref *backref;
1124 rec = get_inode_rec(inode_cache, ino, 1);
1125 BUG_ON(IS_ERR(rec));
1126 backref = get_inode_backref(rec, name, namelen, dir);
1129 backref->errors |= errors;
1130 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1131 if (backref->found_dir_index)
1132 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1133 if (backref->found_inode_ref && backref->index != index)
1134 backref->errors |= REF_ERR_INDEX_UNMATCH;
1135 if (backref->found_dir_item && backref->filetype != filetype)
1136 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1138 backref->index = index;
1139 backref->filetype = filetype;
1140 backref->found_dir_index = 1;
1141 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1143 if (backref->found_dir_item)
1144 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1145 if (backref->found_dir_index && backref->filetype != filetype)
1146 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1148 backref->filetype = filetype;
1149 backref->found_dir_item = 1;
1150 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1151 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1152 if (backref->found_inode_ref)
1153 backref->errors |= REF_ERR_DUP_INODE_REF;
1154 if (backref->found_dir_index && backref->index != index)
1155 backref->errors |= REF_ERR_INDEX_UNMATCH;
1157 backref->index = index;
1159 backref->ref_type = itemtype;
1160 backref->found_inode_ref = 1;
1165 maybe_free_inode_rec(inode_cache, rec);
1169 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1170 struct cache_tree *dst_cache)
1172 struct inode_backref *backref;
1177 list_for_each_entry(backref, &src->backrefs, list) {
1178 if (backref->found_dir_index) {
1179 add_inode_backref(dst_cache, dst->ino, backref->dir,
1180 backref->index, backref->name,
1181 backref->namelen, backref->filetype,
1182 BTRFS_DIR_INDEX_KEY, backref->errors);
1184 if (backref->found_dir_item) {
1186 add_inode_backref(dst_cache, dst->ino,
1187 backref->dir, 0, backref->name,
1188 backref->namelen, backref->filetype,
1189 BTRFS_DIR_ITEM_KEY, backref->errors);
1191 if (backref->found_inode_ref) {
1192 add_inode_backref(dst_cache, dst->ino,
1193 backref->dir, backref->index,
1194 backref->name, backref->namelen, 0,
1195 backref->ref_type, backref->errors);
1199 if (src->found_dir_item)
1200 dst->found_dir_item = 1;
1201 if (src->found_file_extent)
1202 dst->found_file_extent = 1;
1203 if (src->found_csum_item)
1204 dst->found_csum_item = 1;
1205 if (src->some_csum_missing)
1206 dst->some_csum_missing = 1;
1207 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1208 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1213 BUG_ON(src->found_link < dir_count);
1214 dst->found_link += src->found_link - dir_count;
1215 dst->found_size += src->found_size;
1216 if (src->extent_start != (u64)-1) {
1217 if (dst->extent_start == (u64)-1) {
1218 dst->extent_start = src->extent_start;
1219 dst->extent_end = src->extent_end;
1221 if (dst->extent_end > src->extent_start)
1222 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1223 else if (dst->extent_end < src->extent_start) {
1224 ret = add_file_extent_hole(&dst->holes,
1226 src->extent_start - dst->extent_end);
1228 if (dst->extent_end < src->extent_end)
1229 dst->extent_end = src->extent_end;
1233 dst->errors |= src->errors;
1234 if (src->found_inode_item) {
1235 if (!dst->found_inode_item) {
1236 dst->nlink = src->nlink;
1237 dst->isize = src->isize;
1238 dst->nbytes = src->nbytes;
1239 dst->imode = src->imode;
1240 dst->nodatasum = src->nodatasum;
1241 dst->found_inode_item = 1;
1243 dst->errors |= I_ERR_DUP_INODE_ITEM;
1251 static int splice_shared_node(struct shared_node *src_node,
1252 struct shared_node *dst_node)
1254 struct cache_extent *cache;
1255 struct ptr_node *node, *ins;
1256 struct cache_tree *src, *dst;
1257 struct inode_record *rec, *conflict;
1258 u64 current_ino = 0;
1262 if (--src_node->refs == 0)
1264 if (src_node->current)
1265 current_ino = src_node->current->ino;
1267 src = &src_node->root_cache;
1268 dst = &dst_node->root_cache;
1270 cache = search_cache_extent(src, 0);
1272 node = container_of(cache, struct ptr_node, cache);
1274 cache = next_cache_extent(cache);
1277 remove_cache_extent(src, &node->cache);
1280 ins = malloc(sizeof(*ins));
1282 ins->cache.start = node->cache.start;
1283 ins->cache.size = node->cache.size;
1287 ret = insert_cache_extent(dst, &ins->cache);
1288 if (ret == -EEXIST) {
1289 conflict = get_inode_rec(dst, rec->ino, 1);
1290 BUG_ON(IS_ERR(conflict));
1291 merge_inode_recs(rec, conflict, dst);
1293 conflict->checked = 1;
1294 if (dst_node->current == conflict)
1295 dst_node->current = NULL;
1297 maybe_free_inode_rec(dst, conflict);
1298 free_inode_rec(rec);
1305 if (src == &src_node->root_cache) {
1306 src = &src_node->inode_cache;
1307 dst = &dst_node->inode_cache;
1311 if (current_ino > 0 && (!dst_node->current ||
1312 current_ino > dst_node->current->ino)) {
1313 if (dst_node->current) {
1314 dst_node->current->checked = 1;
1315 maybe_free_inode_rec(dst, dst_node->current);
1317 dst_node->current = get_inode_rec(dst, current_ino, 1);
1318 BUG_ON(IS_ERR(dst_node->current));
1323 static void free_inode_ptr(struct cache_extent *cache)
1325 struct ptr_node *node;
1326 struct inode_record *rec;
1328 node = container_of(cache, struct ptr_node, cache);
1330 free_inode_rec(rec);
1334 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1336 static struct shared_node *find_shared_node(struct cache_tree *shared,
1339 struct cache_extent *cache;
1340 struct shared_node *node;
1342 cache = lookup_cache_extent(shared, bytenr, 1);
1344 node = container_of(cache, struct shared_node, cache);
1350 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1353 struct shared_node *node;
1355 node = calloc(1, sizeof(*node));
1358 node->cache.start = bytenr;
1359 node->cache.size = 1;
1360 cache_tree_init(&node->root_cache);
1361 cache_tree_init(&node->inode_cache);
1364 ret = insert_cache_extent(shared, &node->cache);
1369 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1370 struct walk_control *wc, int level)
1372 struct shared_node *node;
1373 struct shared_node *dest;
1376 if (level == wc->active_node)
1379 BUG_ON(wc->active_node <= level);
1380 node = find_shared_node(&wc->shared, bytenr);
1382 ret = add_shared_node(&wc->shared, bytenr, refs);
1384 node = find_shared_node(&wc->shared, bytenr);
1385 wc->nodes[level] = node;
1386 wc->active_node = level;
1390 if (wc->root_level == wc->active_node &&
1391 btrfs_root_refs(&root->root_item) == 0) {
1392 if (--node->refs == 0) {
1393 free_inode_recs_tree(&node->root_cache);
1394 free_inode_recs_tree(&node->inode_cache);
1395 remove_cache_extent(&wc->shared, &node->cache);
1401 dest = wc->nodes[wc->active_node];
1402 splice_shared_node(node, dest);
1403 if (node->refs == 0) {
1404 remove_cache_extent(&wc->shared, &node->cache);
1410 static int leave_shared_node(struct btrfs_root *root,
1411 struct walk_control *wc, int level)
1413 struct shared_node *node;
1414 struct shared_node *dest;
1417 if (level == wc->root_level)
1420 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1424 BUG_ON(i >= BTRFS_MAX_LEVEL);
1426 node = wc->nodes[wc->active_node];
1427 wc->nodes[wc->active_node] = NULL;
1428 wc->active_node = i;
1430 dest = wc->nodes[wc->active_node];
1431 if (wc->active_node < wc->root_level ||
1432 btrfs_root_refs(&root->root_item) > 0) {
1433 BUG_ON(node->refs <= 1);
1434 splice_shared_node(node, dest);
1436 BUG_ON(node->refs < 2);
1445 * 1 - if the root with id child_root_id is a child of root parent_root_id
1446 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1447 * has other root(s) as parent(s)
1448 * 2 - if the root child_root_id doesn't have any parent roots
1450 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1453 struct btrfs_path path;
1454 struct btrfs_key key;
1455 struct extent_buffer *leaf;
1459 btrfs_init_path(&path);
1461 key.objectid = parent_root_id;
1462 key.type = BTRFS_ROOT_REF_KEY;
1463 key.offset = child_root_id;
1464 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1468 btrfs_release_path(&path);
1472 key.objectid = child_root_id;
1473 key.type = BTRFS_ROOT_BACKREF_KEY;
1475 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1481 leaf = path.nodes[0];
1482 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1483 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1486 leaf = path.nodes[0];
1489 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1490 if (key.objectid != child_root_id ||
1491 key.type != BTRFS_ROOT_BACKREF_KEY)
1496 if (key.offset == parent_root_id) {
1497 btrfs_release_path(&path);
1504 btrfs_release_path(&path);
1507 return has_parent ? 0 : 2;
1510 static int process_dir_item(struct btrfs_root *root,
1511 struct extent_buffer *eb,
1512 int slot, struct btrfs_key *key,
1513 struct shared_node *active_node)
1523 struct btrfs_dir_item *di;
1524 struct inode_record *rec;
1525 struct cache_tree *root_cache;
1526 struct cache_tree *inode_cache;
1527 struct btrfs_key location;
1528 char namebuf[BTRFS_NAME_LEN];
1530 root_cache = &active_node->root_cache;
1531 inode_cache = &active_node->inode_cache;
1532 rec = active_node->current;
1533 rec->found_dir_item = 1;
1535 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1536 total = btrfs_item_size_nr(eb, slot);
1537 while (cur < total) {
1539 btrfs_dir_item_key_to_cpu(eb, di, &location);
1540 name_len = btrfs_dir_name_len(eb, di);
1541 data_len = btrfs_dir_data_len(eb, di);
1542 filetype = btrfs_dir_type(eb, di);
1544 rec->found_size += name_len;
1545 if (name_len <= BTRFS_NAME_LEN) {
1549 len = BTRFS_NAME_LEN;
1550 error = REF_ERR_NAME_TOO_LONG;
1552 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1554 if (location.type == BTRFS_INODE_ITEM_KEY) {
1555 add_inode_backref(inode_cache, location.objectid,
1556 key->objectid, key->offset, namebuf,
1557 len, filetype, key->type, error);
1558 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1559 add_inode_backref(root_cache, location.objectid,
1560 key->objectid, key->offset,
1561 namebuf, len, filetype,
1564 fprintf(stderr, "invalid location in dir item %u\n",
1566 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1567 key->objectid, key->offset, namebuf,
1568 len, filetype, key->type, error);
1571 len = sizeof(*di) + name_len + data_len;
1572 di = (struct btrfs_dir_item *)((char *)di + len);
1575 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1576 rec->errors |= I_ERR_DUP_DIR_INDEX;
1581 static int process_inode_ref(struct extent_buffer *eb,
1582 int slot, struct btrfs_key *key,
1583 struct shared_node *active_node)
1591 struct cache_tree *inode_cache;
1592 struct btrfs_inode_ref *ref;
1593 char namebuf[BTRFS_NAME_LEN];
1595 inode_cache = &active_node->inode_cache;
1597 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1598 total = btrfs_item_size_nr(eb, slot);
1599 while (cur < total) {
1600 name_len = btrfs_inode_ref_name_len(eb, ref);
1601 index = btrfs_inode_ref_index(eb, ref);
1602 if (name_len <= BTRFS_NAME_LEN) {
1606 len = BTRFS_NAME_LEN;
1607 error = REF_ERR_NAME_TOO_LONG;
1609 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1610 add_inode_backref(inode_cache, key->objectid, key->offset,
1611 index, namebuf, len, 0, key->type, error);
1613 len = sizeof(*ref) + name_len;
1614 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1620 static int process_inode_extref(struct extent_buffer *eb,
1621 int slot, struct btrfs_key *key,
1622 struct shared_node *active_node)
1631 struct cache_tree *inode_cache;
1632 struct btrfs_inode_extref *extref;
1633 char namebuf[BTRFS_NAME_LEN];
1635 inode_cache = &active_node->inode_cache;
1637 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1638 total = btrfs_item_size_nr(eb, slot);
1639 while (cur < total) {
1640 name_len = btrfs_inode_extref_name_len(eb, extref);
1641 index = btrfs_inode_extref_index(eb, extref);
1642 parent = btrfs_inode_extref_parent(eb, extref);
1643 if (name_len <= BTRFS_NAME_LEN) {
1647 len = BTRFS_NAME_LEN;
1648 error = REF_ERR_NAME_TOO_LONG;
1650 read_extent_buffer(eb, namebuf,
1651 (unsigned long)(extref + 1), len);
1652 add_inode_backref(inode_cache, key->objectid, parent,
1653 index, namebuf, len, 0, key->type, error);
1655 len = sizeof(*extref) + name_len;
1656 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1663 static int count_csum_range(struct btrfs_root *root, u64 start,
1664 u64 len, u64 *found)
1666 struct btrfs_key key;
1667 struct btrfs_path path;
1668 struct extent_buffer *leaf;
1673 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1675 btrfs_init_path(&path);
1677 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1679 key.type = BTRFS_EXTENT_CSUM_KEY;
1681 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1685 if (ret > 0 && path.slots[0] > 0) {
1686 leaf = path.nodes[0];
1687 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1688 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1689 key.type == BTRFS_EXTENT_CSUM_KEY)
1694 leaf = path.nodes[0];
1695 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1696 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1701 leaf = path.nodes[0];
1704 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1705 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1706 key.type != BTRFS_EXTENT_CSUM_KEY)
1709 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1710 if (key.offset >= start + len)
1713 if (key.offset > start)
1716 size = btrfs_item_size_nr(leaf, path.slots[0]);
1717 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1718 if (csum_end > start) {
1719 size = min(csum_end - start, len);
1728 btrfs_release_path(&path);
1734 static int process_file_extent(struct btrfs_root *root,
1735 struct extent_buffer *eb,
1736 int slot, struct btrfs_key *key,
1737 struct shared_node *active_node)
1739 struct inode_record *rec;
1740 struct btrfs_file_extent_item *fi;
1742 u64 disk_bytenr = 0;
1743 u64 extent_offset = 0;
1744 u64 mask = root->sectorsize - 1;
1748 rec = active_node->current;
1749 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1750 rec->found_file_extent = 1;
1752 if (rec->extent_start == (u64)-1) {
1753 rec->extent_start = key->offset;
1754 rec->extent_end = key->offset;
1757 if (rec->extent_end > key->offset)
1758 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1759 else if (rec->extent_end < key->offset) {
1760 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1761 key->offset - rec->extent_end);
1766 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1767 extent_type = btrfs_file_extent_type(eb, fi);
1769 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1770 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1772 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1773 rec->found_size += num_bytes;
1774 num_bytes = (num_bytes + mask) & ~mask;
1775 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1776 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1777 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1778 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1779 extent_offset = btrfs_file_extent_offset(eb, fi);
1780 if (num_bytes == 0 || (num_bytes & mask))
1781 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1782 if (num_bytes + extent_offset >
1783 btrfs_file_extent_ram_bytes(eb, fi))
1784 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1785 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1786 (btrfs_file_extent_compression(eb, fi) ||
1787 btrfs_file_extent_encryption(eb, fi) ||
1788 btrfs_file_extent_other_encoding(eb, fi)))
1789 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1790 if (disk_bytenr > 0)
1791 rec->found_size += num_bytes;
1793 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795 rec->extent_end = key->offset + num_bytes;
1798 * The data reloc tree will copy full extents into its inode and then
1799 * copy the corresponding csums. Because the extent it copied could be
1800 * a preallocated extent that hasn't been written to yet there may be no
1801 * csums to copy, ergo we won't have csums for our file extent. This is
1802 * ok so just don't bother checking csums if the inode belongs to the
1805 if (disk_bytenr > 0 &&
1806 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1808 if (btrfs_file_extent_compression(eb, fi))
1809 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1811 disk_bytenr += extent_offset;
1813 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1816 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1818 rec->found_csum_item = 1;
1819 if (found < num_bytes)
1820 rec->some_csum_missing = 1;
1821 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1823 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1829 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1830 struct walk_control *wc)
1832 struct btrfs_key key;
1836 struct cache_tree *inode_cache;
1837 struct shared_node *active_node;
1839 if (wc->root_level == wc->active_node &&
1840 btrfs_root_refs(&root->root_item) == 0)
1843 active_node = wc->nodes[wc->active_node];
1844 inode_cache = &active_node->inode_cache;
1845 nritems = btrfs_header_nritems(eb);
1846 for (i = 0; i < nritems; i++) {
1847 btrfs_item_key_to_cpu(eb, &key, i);
1849 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1851 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1854 if (active_node->current == NULL ||
1855 active_node->current->ino < key.objectid) {
1856 if (active_node->current) {
1857 active_node->current->checked = 1;
1858 maybe_free_inode_rec(inode_cache,
1859 active_node->current);
1861 active_node->current = get_inode_rec(inode_cache,
1863 BUG_ON(IS_ERR(active_node->current));
1866 case BTRFS_DIR_ITEM_KEY:
1867 case BTRFS_DIR_INDEX_KEY:
1868 ret = process_dir_item(root, eb, i, &key, active_node);
1870 case BTRFS_INODE_REF_KEY:
1871 ret = process_inode_ref(eb, i, &key, active_node);
1873 case BTRFS_INODE_EXTREF_KEY:
1874 ret = process_inode_extref(eb, i, &key, active_node);
1876 case BTRFS_INODE_ITEM_KEY:
1877 ret = process_inode_item(eb, i, &key, active_node);
1879 case BTRFS_EXTENT_DATA_KEY:
1880 ret = process_file_extent(root, eb, i, &key,
1890 static void reada_walk_down(struct btrfs_root *root,
1891 struct extent_buffer *node, int slot)
1900 level = btrfs_header_level(node);
1904 nritems = btrfs_header_nritems(node);
1905 blocksize = root->nodesize;
1906 for (i = slot; i < nritems; i++) {
1907 bytenr = btrfs_node_blockptr(node, i);
1908 ptr_gen = btrfs_node_ptr_generation(node, i);
1909 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1914 * Check the child node/leaf by the following condition:
1915 * 1. the first item key of the node/leaf should be the same with the one
1917 * 2. block in parent node should match the child node/leaf.
1918 * 3. generation of parent node and child's header should be consistent.
1920 * Or the child node/leaf pointed by the key in parent is not valid.
1922 * We hope to check leaf owner too, but since subvol may share leaves,
1923 * which makes leaf owner check not so strong, key check should be
1924 * sufficient enough for that case.
1926 static int check_child_node(struct btrfs_root *root,
1927 struct extent_buffer *parent, int slot,
1928 struct extent_buffer *child)
1930 struct btrfs_key parent_key;
1931 struct btrfs_key child_key;
1934 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1935 if (btrfs_header_level(child) == 0)
1936 btrfs_item_key_to_cpu(child, &child_key, 0);
1938 btrfs_node_key_to_cpu(child, &child_key, 0);
1940 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1943 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1944 parent_key.objectid, parent_key.type, parent_key.offset,
1945 child_key.objectid, child_key.type, child_key.offset);
1947 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1949 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1950 btrfs_node_blockptr(parent, slot),
1951 btrfs_header_bytenr(child));
1953 if (btrfs_node_ptr_generation(parent, slot) !=
1954 btrfs_header_generation(child)) {
1956 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1957 btrfs_header_generation(child),
1958 btrfs_node_ptr_generation(parent, slot));
1963 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1964 struct walk_control *wc, int *level)
1966 enum btrfs_tree_block_status status;
1969 struct extent_buffer *next;
1970 struct extent_buffer *cur;
1975 WARN_ON(*level < 0);
1976 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1977 ret = btrfs_lookup_extent_info(NULL, root,
1978 path->nodes[*level]->start,
1979 *level, 1, &refs, NULL);
1986 ret = enter_shared_node(root, path->nodes[*level]->start,
1994 while (*level >= 0) {
1995 WARN_ON(*level < 0);
1996 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1997 cur = path->nodes[*level];
1999 if (btrfs_header_level(cur) != *level)
2002 if (path->slots[*level] >= btrfs_header_nritems(cur))
2005 ret = process_one_leaf(root, cur, wc);
2010 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2011 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2012 blocksize = root->nodesize;
2013 ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
2019 ret = enter_shared_node(root, bytenr, refs,
2022 path->slots[*level]++;
2027 next = btrfs_find_tree_block(root, bytenr, blocksize);
2028 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2029 free_extent_buffer(next);
2030 reada_walk_down(root, cur, path->slots[*level]);
2031 next = read_tree_block(root, bytenr, blocksize,
2033 if (!extent_buffer_uptodate(next)) {
2034 struct btrfs_key node_key;
2036 btrfs_node_key_to_cpu(path->nodes[*level],
2038 path->slots[*level]);
2039 btrfs_add_corrupt_extent_record(root->fs_info,
2041 path->nodes[*level]->start,
2042 root->nodesize, *level);
2048 ret = check_child_node(root, cur, path->slots[*level], next);
2054 if (btrfs_is_leaf(next))
2055 status = btrfs_check_leaf(root, NULL, next);
2057 status = btrfs_check_node(root, NULL, next);
2058 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2059 free_extent_buffer(next);
2064 *level = *level - 1;
2065 free_extent_buffer(path->nodes[*level]);
2066 path->nodes[*level] = next;
2067 path->slots[*level] = 0;
2070 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2074 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2075 struct walk_control *wc, int *level)
2078 struct extent_buffer *leaf;
2080 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2081 leaf = path->nodes[i];
2082 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2087 free_extent_buffer(path->nodes[*level]);
2088 path->nodes[*level] = NULL;
2089 BUG_ON(*level > wc->active_node);
2090 if (*level == wc->active_node)
2091 leave_shared_node(root, wc, *level);
2098 static int check_root_dir(struct inode_record *rec)
2100 struct inode_backref *backref;
2103 if (!rec->found_inode_item || rec->errors)
2105 if (rec->nlink != 1 || rec->found_link != 0)
2107 if (list_empty(&rec->backrefs))
2109 backref = to_inode_backref(rec->backrefs.next);
2110 if (!backref->found_inode_ref)
2112 if (backref->index != 0 || backref->namelen != 2 ||
2113 memcmp(backref->name, "..", 2))
2115 if (backref->found_dir_index || backref->found_dir_item)
2122 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2123 struct btrfs_root *root, struct btrfs_path *path,
2124 struct inode_record *rec)
2126 struct btrfs_inode_item *ei;
2127 struct btrfs_key key;
2130 key.objectid = rec->ino;
2131 key.type = BTRFS_INODE_ITEM_KEY;
2132 key.offset = (u64)-1;
2134 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2138 if (!path->slots[0]) {
2145 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2146 if (key.objectid != rec->ino) {
2151 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2152 struct btrfs_inode_item);
2153 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2154 btrfs_mark_buffer_dirty(path->nodes[0]);
2155 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2156 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2157 root->root_key.objectid);
2159 btrfs_release_path(path);
2163 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2164 struct btrfs_root *root,
2165 struct btrfs_path *path,
2166 struct inode_record *rec)
2170 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2171 btrfs_release_path(path);
2173 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2177 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2178 struct btrfs_root *root,
2179 struct btrfs_path *path,
2180 struct inode_record *rec)
2182 struct btrfs_inode_item *ei;
2183 struct btrfs_key key;
2186 key.objectid = rec->ino;
2187 key.type = BTRFS_INODE_ITEM_KEY;
2190 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2197 /* Since ret == 0, no need to check anything */
2198 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2199 struct btrfs_inode_item);
2200 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2201 btrfs_mark_buffer_dirty(path->nodes[0]);
2202 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2203 printf("reset nbytes for ino %llu root %llu\n",
2204 rec->ino, root->root_key.objectid);
2206 btrfs_release_path(path);
2210 static int add_missing_dir_index(struct btrfs_root *root,
2211 struct cache_tree *inode_cache,
2212 struct inode_record *rec,
2213 struct inode_backref *backref)
2215 struct btrfs_path *path;
2216 struct btrfs_trans_handle *trans;
2217 struct btrfs_dir_item *dir_item;
2218 struct extent_buffer *leaf;
2219 struct btrfs_key key;
2220 struct btrfs_disk_key disk_key;
2221 struct inode_record *dir_rec;
2222 unsigned long name_ptr;
2223 u32 data_size = sizeof(*dir_item) + backref->namelen;
2226 path = btrfs_alloc_path();
2230 trans = btrfs_start_transaction(root, 1);
2231 if (IS_ERR(trans)) {
2232 btrfs_free_path(path);
2233 return PTR_ERR(trans);
2236 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2237 (unsigned long long)rec->ino);
2238 key.objectid = backref->dir;
2239 key.type = BTRFS_DIR_INDEX_KEY;
2240 key.offset = backref->index;
2242 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2245 leaf = path->nodes[0];
2246 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2248 disk_key.objectid = cpu_to_le64(rec->ino);
2249 disk_key.type = BTRFS_INODE_ITEM_KEY;
2250 disk_key.offset = 0;
2252 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2253 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2254 btrfs_set_dir_data_len(leaf, dir_item, 0);
2255 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2256 name_ptr = (unsigned long)(dir_item + 1);
2257 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2258 btrfs_mark_buffer_dirty(leaf);
2259 btrfs_free_path(path);
2260 btrfs_commit_transaction(trans, root);
2262 backref->found_dir_index = 1;
2263 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2264 BUG_ON(IS_ERR(dir_rec));
2267 dir_rec->found_size += backref->namelen;
2268 if (dir_rec->found_size == dir_rec->isize &&
2269 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2270 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2271 if (dir_rec->found_size != dir_rec->isize)
2272 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2277 static int delete_dir_index(struct btrfs_root *root,
2278 struct cache_tree *inode_cache,
2279 struct inode_record *rec,
2280 struct inode_backref *backref)
2282 struct btrfs_trans_handle *trans;
2283 struct btrfs_dir_item *di;
2284 struct btrfs_path *path;
2287 path = btrfs_alloc_path();
2291 trans = btrfs_start_transaction(root, 1);
2292 if (IS_ERR(trans)) {
2293 btrfs_free_path(path);
2294 return PTR_ERR(trans);
2298 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2299 (unsigned long long)backref->dir,
2300 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2301 (unsigned long long)root->objectid);
2303 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2304 backref->name, backref->namelen,
2305 backref->index, -1);
2308 btrfs_free_path(path);
2309 btrfs_commit_transaction(trans, root);
2316 ret = btrfs_del_item(trans, root, path);
2318 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2320 btrfs_free_path(path);
2321 btrfs_commit_transaction(trans, root);
2325 static int create_inode_item(struct btrfs_root *root,
2326 struct inode_record *rec,
2327 struct inode_backref *backref, int root_dir)
2329 struct btrfs_trans_handle *trans;
2330 struct btrfs_inode_item inode_item;
2331 time_t now = time(NULL);
2334 trans = btrfs_start_transaction(root, 1);
2335 if (IS_ERR(trans)) {
2336 ret = PTR_ERR(trans);
2340 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2341 "be incomplete, please check permissions and content after "
2342 "the fsck completes.\n", (unsigned long long)root->objectid,
2343 (unsigned long long)rec->ino);
2345 memset(&inode_item, 0, sizeof(inode_item));
2346 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2348 btrfs_set_stack_inode_nlink(&inode_item, 1);
2350 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2351 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2352 if (rec->found_dir_item) {
2353 if (rec->found_file_extent)
2354 fprintf(stderr, "root %llu inode %llu has both a dir "
2355 "item and extents, unsure if it is a dir or a "
2356 "regular file so setting it as a directory\n",
2357 (unsigned long long)root->objectid,
2358 (unsigned long long)rec->ino);
2359 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2360 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2361 } else if (!rec->found_dir_item) {
2362 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2363 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2365 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2366 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2367 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2368 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2369 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2370 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2371 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2372 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2374 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2376 btrfs_commit_transaction(trans, root);
2380 static int repair_inode_backrefs(struct btrfs_root *root,
2381 struct inode_record *rec,
2382 struct cache_tree *inode_cache,
2385 struct inode_backref *tmp, *backref;
2386 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2390 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2391 if (!delete && rec->ino == root_dirid) {
2392 if (!rec->found_inode_item) {
2393 ret = create_inode_item(root, rec, backref, 1);
2400 /* Index 0 for root dir's are special, don't mess with it */
2401 if (rec->ino == root_dirid && backref->index == 0)
2405 ((backref->found_dir_index && !backref->found_inode_ref) ||
2406 (backref->found_dir_index && backref->found_inode_ref &&
2407 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2408 ret = delete_dir_index(root, inode_cache, rec, backref);
2412 list_del(&backref->list);
2416 if (!delete && !backref->found_dir_index &&
2417 backref->found_dir_item && backref->found_inode_ref) {
2418 ret = add_missing_dir_index(root, inode_cache, rec,
2423 if (backref->found_dir_item &&
2424 backref->found_dir_index &&
2425 backref->found_dir_index) {
2426 if (!backref->errors &&
2427 backref->found_inode_ref) {
2428 list_del(&backref->list);
2434 if (!delete && (!backref->found_dir_index &&
2435 !backref->found_dir_item &&
2436 backref->found_inode_ref)) {
2437 struct btrfs_trans_handle *trans;
2438 struct btrfs_key location;
2440 ret = check_dir_conflict(root, backref->name,
2446 * let nlink fixing routine to handle it,
2447 * which can do it better.
2452 location.objectid = rec->ino;
2453 location.type = BTRFS_INODE_ITEM_KEY;
2454 location.offset = 0;
2456 trans = btrfs_start_transaction(root, 1);
2457 if (IS_ERR(trans)) {
2458 ret = PTR_ERR(trans);
2461 fprintf(stderr, "adding missing dir index/item pair "
2463 (unsigned long long)rec->ino);
2464 ret = btrfs_insert_dir_item(trans, root, backref->name,
2466 backref->dir, &location,
2467 imode_to_type(rec->imode),
2470 btrfs_commit_transaction(trans, root);
2474 if (!delete && (backref->found_inode_ref &&
2475 backref->found_dir_index &&
2476 backref->found_dir_item &&
2477 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2478 !rec->found_inode_item)) {
2479 ret = create_inode_item(root, rec, backref, 0);
2486 return ret ? ret : repaired;
2490 * To determine the file type for nlink/inode_item repair
2492 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2493 * Return -ENOENT if file type is not found.
2495 static int find_file_type(struct inode_record *rec, u8 *type)
2497 struct inode_backref *backref;
2499 /* For inode item recovered case */
2500 if (rec->found_inode_item) {
2501 *type = imode_to_type(rec->imode);
2505 list_for_each_entry(backref, &rec->backrefs, list) {
2506 if (backref->found_dir_index || backref->found_dir_item) {
2507 *type = backref->filetype;
2515 * To determine the file name for nlink repair
2517 * Return 0 if file name is found, set name and namelen.
2518 * Return -ENOENT if file name is not found.
2520 static int find_file_name(struct inode_record *rec,
2521 char *name, int *namelen)
2523 struct inode_backref *backref;
2525 list_for_each_entry(backref, &rec->backrefs, list) {
2526 if (backref->found_dir_index || backref->found_dir_item ||
2527 backref->found_inode_ref) {
2528 memcpy(name, backref->name, backref->namelen);
2529 *namelen = backref->namelen;
2536 /* Reset the nlink of the inode to the correct one */
2537 static int reset_nlink(struct btrfs_trans_handle *trans,
2538 struct btrfs_root *root,
2539 struct btrfs_path *path,
2540 struct inode_record *rec)
2542 struct inode_backref *backref;
2543 struct inode_backref *tmp;
2544 struct btrfs_key key;
2545 struct btrfs_inode_item *inode_item;
2548 /* We don't believe this either, reset it and iterate backref */
2549 rec->found_link = 0;
2551 /* Remove all backref including the valid ones */
2552 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2553 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2554 backref->index, backref->name,
2555 backref->namelen, 0);
2559 /* remove invalid backref, so it won't be added back */
2560 if (!(backref->found_dir_index &&
2561 backref->found_dir_item &&
2562 backref->found_inode_ref)) {
2563 list_del(&backref->list);
2570 /* Set nlink to 0 */
2571 key.objectid = rec->ino;
2572 key.type = BTRFS_INODE_ITEM_KEY;
2574 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2581 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2582 struct btrfs_inode_item);
2583 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2584 btrfs_mark_buffer_dirty(path->nodes[0]);
2585 btrfs_release_path(path);
2588 * Add back valid inode_ref/dir_item/dir_index,
2589 * add_link() will handle the nlink inc, so new nlink must be correct
2591 list_for_each_entry(backref, &rec->backrefs, list) {
2592 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2593 backref->name, backref->namelen,
2594 backref->filetype, &backref->index, 1);
2599 btrfs_release_path(path);
2603 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2604 struct btrfs_root *root,
2605 struct btrfs_path *path,
2606 struct inode_record *rec)
2608 char *dir_name = "lost+found";
2609 char namebuf[BTRFS_NAME_LEN] = {0};
2614 int name_recovered = 0;
2615 int type_recovered = 0;
2619 * Get file name and type first before these invalid inode ref
2620 * are deleted by remove_all_invalid_backref()
2622 name_recovered = !find_file_name(rec, namebuf, &namelen);
2623 type_recovered = !find_file_type(rec, &type);
2625 if (!name_recovered) {
2626 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2627 rec->ino, rec->ino);
2628 namelen = count_digits(rec->ino);
2629 sprintf(namebuf, "%llu", rec->ino);
2632 if (!type_recovered) {
2633 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2635 type = BTRFS_FT_REG_FILE;
2639 ret = reset_nlink(trans, root, path, rec);
2642 "Failed to reset nlink for inode %llu: %s\n",
2643 rec->ino, strerror(-ret));
2647 if (rec->found_link == 0) {
2648 lost_found_ino = root->highest_inode;
2649 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2654 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2655 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2658 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2659 dir_name, strerror(-ret));
2662 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2663 namebuf, namelen, type, NULL, 1);
2665 * Add ".INO" suffix several times to handle case where
2666 * "FILENAME.INO" is already taken by another file.
2668 while (ret == -EEXIST) {
2670 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2672 if (namelen + count_digits(rec->ino) + 1 >
2677 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2679 namelen += count_digits(rec->ino) + 1;
2680 ret = btrfs_add_link(trans, root, rec->ino,
2681 lost_found_ino, namebuf,
2682 namelen, type, NULL, 1);
2686 "Failed to link the inode %llu to %s dir: %s\n",
2687 rec->ino, dir_name, strerror(-ret));
2691 * Just increase the found_link, don't actually add the
2692 * backref. This will make things easier and this inode
2693 * record will be freed after the repair is done.
2694 * So fsck will not report problem about this inode.
2697 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2698 namelen, namebuf, dir_name);
2700 printf("Fixed the nlink of inode %llu\n", rec->ino);
2703 * Clear the flag anyway, or we will loop forever for the same inode
2704 * as it will not be removed from the bad inode list and the dead loop
2707 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2708 btrfs_release_path(path);
2713 * Check if there is any normal(reg or prealloc) file extent for given
2715 * This is used to determine the file type when neither its dir_index/item or
2716 * inode_item exists.
2718 * This will *NOT* report error, if any error happens, just consider it does
2719 * not have any normal file extent.
2721 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2723 struct btrfs_path *path;
2724 struct btrfs_key key;
2725 struct btrfs_key found_key;
2726 struct btrfs_file_extent_item *fi;
2730 path = btrfs_alloc_path();
2734 key.type = BTRFS_EXTENT_DATA_KEY;
2737 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2742 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2743 ret = btrfs_next_leaf(root, path);
2750 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2752 if (found_key.objectid != ino ||
2753 found_key.type != BTRFS_EXTENT_DATA_KEY)
2755 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2756 struct btrfs_file_extent_item);
2757 type = btrfs_file_extent_type(path->nodes[0], fi);
2758 if (type != BTRFS_FILE_EXTENT_INLINE) {
2764 btrfs_free_path(path);
2768 static u32 btrfs_type_to_imode(u8 type)
2770 static u32 imode_by_btrfs_type[] = {
2771 [BTRFS_FT_REG_FILE] = S_IFREG,
2772 [BTRFS_FT_DIR] = S_IFDIR,
2773 [BTRFS_FT_CHRDEV] = S_IFCHR,
2774 [BTRFS_FT_BLKDEV] = S_IFBLK,
2775 [BTRFS_FT_FIFO] = S_IFIFO,
2776 [BTRFS_FT_SOCK] = S_IFSOCK,
2777 [BTRFS_FT_SYMLINK] = S_IFLNK,
2780 return imode_by_btrfs_type[(type)];
2783 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2784 struct btrfs_root *root,
2785 struct btrfs_path *path,
2786 struct inode_record *rec)
2790 int type_recovered = 0;
2793 printf("Trying to rebuild inode:%llu\n", rec->ino);
2795 type_recovered = !find_file_type(rec, &filetype);
2798 * Try to determine inode type if type not found.
2800 * For found regular file extent, it must be FILE.
2801 * For found dir_item/index, it must be DIR.
2803 * For undetermined one, use FILE as fallback.
2806 * 1. If found backref(inode_index/item is already handled) to it,
2808 * Need new inode-inode ref structure to allow search for that.
2810 if (!type_recovered) {
2811 if (rec->found_file_extent &&
2812 find_normal_file_extent(root, rec->ino)) {
2814 filetype = BTRFS_FT_REG_FILE;
2815 } else if (rec->found_dir_item) {
2817 filetype = BTRFS_FT_DIR;
2818 } else if (!list_empty(&rec->orphan_extents)) {
2820 filetype = BTRFS_FT_REG_FILE;
2822 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2825 filetype = BTRFS_FT_REG_FILE;
2829 ret = btrfs_new_inode(trans, root, rec->ino,
2830 mode | btrfs_type_to_imode(filetype));
2835 * Here inode rebuild is done, we only rebuild the inode item,
2836 * don't repair the nlink(like move to lost+found).
2837 * That is the job of nlink repair.
2839 * We just fill the record and return
2841 rec->found_dir_item = 1;
2842 rec->imode = mode | btrfs_type_to_imode(filetype);
2844 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2845 /* Ensure the inode_nlinks repair function will be called */
2846 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2851 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2852 struct btrfs_root *root,
2853 struct btrfs_path *path,
2854 struct inode_record *rec)
2856 struct orphan_data_extent *orphan;
2857 struct orphan_data_extent *tmp;
2860 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2862 * Check for conflicting file extents
2864 * Here we don't know whether the extents is compressed or not,
2865 * so we can only assume it not compressed nor data offset,
2866 * and use its disk_len as extent length.
2868 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2869 orphan->offset, orphan->disk_len, 0);
2870 btrfs_release_path(path);
2875 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2876 orphan->disk_bytenr, orphan->disk_len);
2877 ret = btrfs_free_extent(trans,
2878 root->fs_info->extent_root,
2879 orphan->disk_bytenr, orphan->disk_len,
2880 0, root->objectid, orphan->objectid,
2885 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2886 orphan->offset, orphan->disk_bytenr,
2887 orphan->disk_len, orphan->disk_len);
2891 /* Update file size info */
2892 rec->found_size += orphan->disk_len;
2893 if (rec->found_size == rec->nbytes)
2894 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2896 /* Update the file extent hole info too */
2897 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2901 if (RB_EMPTY_ROOT(&rec->holes))
2902 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2904 list_del(&orphan->list);
2907 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2912 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2913 struct btrfs_root *root,
2914 struct btrfs_path *path,
2915 struct inode_record *rec)
2917 struct rb_node *node;
2918 struct file_extent_hole *hole;
2922 node = rb_first(&rec->holes);
2926 hole = rb_entry(node, struct file_extent_hole, node);
2927 ret = btrfs_punch_hole(trans, root, rec->ino,
2928 hole->start, hole->len);
2931 ret = del_file_extent_hole(&rec->holes, hole->start,
2935 if (RB_EMPTY_ROOT(&rec->holes))
2936 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2937 node = rb_first(&rec->holes);
2939 /* special case for a file losing all its file extent */
2941 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2942 round_up(rec->isize, root->sectorsize));
2946 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2947 rec->ino, root->objectid);
2952 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2954 struct btrfs_trans_handle *trans;
2955 struct btrfs_path *path;
2958 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2959 I_ERR_NO_ORPHAN_ITEM |
2960 I_ERR_LINK_COUNT_WRONG |
2961 I_ERR_NO_INODE_ITEM |
2962 I_ERR_FILE_EXTENT_ORPHAN |
2963 I_ERR_FILE_EXTENT_DISCOUNT|
2964 I_ERR_FILE_NBYTES_WRONG)))
2967 path = btrfs_alloc_path();
2972 * For nlink repair, it may create a dir and add link, so
2973 * 2 for parent(256)'s dir_index and dir_item
2974 * 2 for lost+found dir's inode_item and inode_ref
2975 * 1 for the new inode_ref of the file
2976 * 2 for lost+found dir's dir_index and dir_item for the file
2978 trans = btrfs_start_transaction(root, 7);
2979 if (IS_ERR(trans)) {
2980 btrfs_free_path(path);
2981 return PTR_ERR(trans);
2984 if (rec->errors & I_ERR_NO_INODE_ITEM)
2985 ret = repair_inode_no_item(trans, root, path, rec);
2986 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
2987 ret = repair_inode_orphan_extent(trans, root, path, rec);
2988 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
2989 ret = repair_inode_discount_extent(trans, root, path, rec);
2990 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
2991 ret = repair_inode_isize(trans, root, path, rec);
2992 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
2993 ret = repair_inode_orphan_item(trans, root, path, rec);
2994 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
2995 ret = repair_inode_nlinks(trans, root, path, rec);
2996 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
2997 ret = repair_inode_nbytes(trans, root, path, rec);
2998 btrfs_commit_transaction(trans, root);
2999 btrfs_free_path(path);
3003 static int check_inode_recs(struct btrfs_root *root,
3004 struct cache_tree *inode_cache)
3006 struct cache_extent *cache;
3007 struct ptr_node *node;
3008 struct inode_record *rec;
3009 struct inode_backref *backref;
3014 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3016 if (btrfs_root_refs(&root->root_item) == 0) {
3017 if (!cache_tree_empty(inode_cache))
3018 fprintf(stderr, "warning line %d\n", __LINE__);
3023 * We need to record the highest inode number for later 'lost+found'
3025 * We must select an ino not used/referred by any existing inode, or
3026 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3027 * this may cause 'lost+found' dir has wrong nlinks.
3029 cache = last_cache_extent(inode_cache);
3031 node = container_of(cache, struct ptr_node, cache);
3033 if (rec->ino > root->highest_inode)
3034 root->highest_inode = rec->ino;
3038 * We need to repair backrefs first because we could change some of the
3039 * errors in the inode recs.
3041 * We also need to go through and delete invalid backrefs first and then
3042 * add the correct ones second. We do this because we may get EEXIST
3043 * when adding back the correct index because we hadn't yet deleted the
3046 * For example, if we were missing a dir index then the directories
3047 * isize would be wrong, so if we fixed the isize to what we thought it
3048 * would be and then fixed the backref we'd still have a invalid fs, so
3049 * we need to add back the dir index and then check to see if the isize
3054 if (stage == 3 && !err)
3057 cache = search_cache_extent(inode_cache, 0);
3058 while (repair && cache) {
3059 node = container_of(cache, struct ptr_node, cache);
3061 cache = next_cache_extent(cache);
3063 /* Need to free everything up and rescan */
3065 remove_cache_extent(inode_cache, &node->cache);
3067 free_inode_rec(rec);
3071 if (list_empty(&rec->backrefs))
3074 ret = repair_inode_backrefs(root, rec, inode_cache,
3088 rec = get_inode_rec(inode_cache, root_dirid, 0);
3089 BUG_ON(IS_ERR(rec));
3091 ret = check_root_dir(rec);
3093 fprintf(stderr, "root %llu root dir %llu error\n",
3094 (unsigned long long)root->root_key.objectid,
3095 (unsigned long long)root_dirid);
3096 print_inode_error(root, rec);
3101 struct btrfs_trans_handle *trans;
3103 trans = btrfs_start_transaction(root, 1);
3104 if (IS_ERR(trans)) {
3105 err = PTR_ERR(trans);
3110 "root %llu missing its root dir, recreating\n",
3111 (unsigned long long)root->objectid);
3113 ret = btrfs_make_root_dir(trans, root, root_dirid);
3116 btrfs_commit_transaction(trans, root);
3120 fprintf(stderr, "root %llu root dir %llu not found\n",
3121 (unsigned long long)root->root_key.objectid,
3122 (unsigned long long)root_dirid);
3126 cache = search_cache_extent(inode_cache, 0);
3129 node = container_of(cache, struct ptr_node, cache);
3131 remove_cache_extent(inode_cache, &node->cache);
3133 if (rec->ino == root_dirid ||
3134 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3135 free_inode_rec(rec);
3139 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3140 ret = check_orphan_item(root, rec->ino);
3142 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3143 if (can_free_inode_rec(rec)) {
3144 free_inode_rec(rec);
3149 if (!rec->found_inode_item)
3150 rec->errors |= I_ERR_NO_INODE_ITEM;
3151 if (rec->found_link != rec->nlink)
3152 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3154 ret = try_repair_inode(root, rec);
3155 if (ret == 0 && can_free_inode_rec(rec)) {
3156 free_inode_rec(rec);
3162 if (!(repair && ret == 0))
3164 print_inode_error(root, rec);
3165 list_for_each_entry(backref, &rec->backrefs, list) {
3166 if (!backref->found_dir_item)
3167 backref->errors |= REF_ERR_NO_DIR_ITEM;
3168 if (!backref->found_dir_index)
3169 backref->errors |= REF_ERR_NO_DIR_INDEX;
3170 if (!backref->found_inode_ref)
3171 backref->errors |= REF_ERR_NO_INODE_REF;
3172 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3173 " namelen %u name %s filetype %d errors %x",
3174 (unsigned long long)backref->dir,
3175 (unsigned long long)backref->index,
3176 backref->namelen, backref->name,
3177 backref->filetype, backref->errors);
3178 print_ref_error(backref->errors);
3180 free_inode_rec(rec);
3182 return (error > 0) ? -1 : 0;
3185 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3188 struct cache_extent *cache;
3189 struct root_record *rec = NULL;
3192 cache = lookup_cache_extent(root_cache, objectid, 1);
3194 rec = container_of(cache, struct root_record, cache);
3196 rec = calloc(1, sizeof(*rec));
3198 return ERR_PTR(-ENOMEM);
3199 rec->objectid = objectid;
3200 INIT_LIST_HEAD(&rec->backrefs);
3201 rec->cache.start = objectid;
3202 rec->cache.size = 1;
3204 ret = insert_cache_extent(root_cache, &rec->cache);
3206 return ERR_PTR(-EEXIST);
3211 static struct root_backref *get_root_backref(struct root_record *rec,
3212 u64 ref_root, u64 dir, u64 index,
3213 const char *name, int namelen)
3215 struct root_backref *backref;
3217 list_for_each_entry(backref, &rec->backrefs, list) {
3218 if (backref->ref_root != ref_root || backref->dir != dir ||
3219 backref->namelen != namelen)
3221 if (memcmp(name, backref->name, namelen))
3226 backref = calloc(1, sizeof(*backref) + namelen + 1);
3229 backref->ref_root = ref_root;
3231 backref->index = index;
3232 backref->namelen = namelen;
3233 memcpy(backref->name, name, namelen);
3234 backref->name[namelen] = '\0';
3235 list_add_tail(&backref->list, &rec->backrefs);
3239 static void free_root_record(struct cache_extent *cache)
3241 struct root_record *rec;
3242 struct root_backref *backref;
3244 rec = container_of(cache, struct root_record, cache);
3245 while (!list_empty(&rec->backrefs)) {
3246 backref = to_root_backref(rec->backrefs.next);
3247 list_del(&backref->list);
3254 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3256 static int add_root_backref(struct cache_tree *root_cache,
3257 u64 root_id, u64 ref_root, u64 dir, u64 index,
3258 const char *name, int namelen,
3259 int item_type, int errors)
3261 struct root_record *rec;
3262 struct root_backref *backref;
3264 rec = get_root_rec(root_cache, root_id);
3265 BUG_ON(IS_ERR(rec));
3266 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3269 backref->errors |= errors;
3271 if (item_type != BTRFS_DIR_ITEM_KEY) {
3272 if (backref->found_dir_index || backref->found_back_ref ||
3273 backref->found_forward_ref) {
3274 if (backref->index != index)
3275 backref->errors |= REF_ERR_INDEX_UNMATCH;
3277 backref->index = index;
3281 if (item_type == BTRFS_DIR_ITEM_KEY) {
3282 if (backref->found_forward_ref)
3284 backref->found_dir_item = 1;
3285 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3286 backref->found_dir_index = 1;
3287 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3288 if (backref->found_forward_ref)
3289 backref->errors |= REF_ERR_DUP_ROOT_REF;
3290 else if (backref->found_dir_item)
3292 backref->found_forward_ref = 1;
3293 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3294 if (backref->found_back_ref)
3295 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3296 backref->found_back_ref = 1;
3301 if (backref->found_forward_ref && backref->found_dir_item)
3302 backref->reachable = 1;
3306 static int merge_root_recs(struct btrfs_root *root,
3307 struct cache_tree *src_cache,
3308 struct cache_tree *dst_cache)
3310 struct cache_extent *cache;
3311 struct ptr_node *node;
3312 struct inode_record *rec;
3313 struct inode_backref *backref;
3316 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3317 free_inode_recs_tree(src_cache);
3322 cache = search_cache_extent(src_cache, 0);
3325 node = container_of(cache, struct ptr_node, cache);
3327 remove_cache_extent(src_cache, &node->cache);
3330 ret = is_child_root(root, root->objectid, rec->ino);
3336 list_for_each_entry(backref, &rec->backrefs, list) {
3337 BUG_ON(backref->found_inode_ref);
3338 if (backref->found_dir_item)
3339 add_root_backref(dst_cache, rec->ino,
3340 root->root_key.objectid, backref->dir,
3341 backref->index, backref->name,
3342 backref->namelen, BTRFS_DIR_ITEM_KEY,
3344 if (backref->found_dir_index)
3345 add_root_backref(dst_cache, rec->ino,
3346 root->root_key.objectid, backref->dir,
3347 backref->index, backref->name,
3348 backref->namelen, BTRFS_DIR_INDEX_KEY,
3352 free_inode_rec(rec);
3359 static int check_root_refs(struct btrfs_root *root,
3360 struct cache_tree *root_cache)
3362 struct root_record *rec;
3363 struct root_record *ref_root;
3364 struct root_backref *backref;
3365 struct cache_extent *cache;
3371 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3372 BUG_ON(IS_ERR(rec));
3375 /* fixme: this can not detect circular references */
3378 cache = search_cache_extent(root_cache, 0);
3382 rec = container_of(cache, struct root_record, cache);
3383 cache = next_cache_extent(cache);
3385 if (rec->found_ref == 0)
3388 list_for_each_entry(backref, &rec->backrefs, list) {
3389 if (!backref->reachable)
3392 ref_root = get_root_rec(root_cache,
3394 BUG_ON(IS_ERR(ref_root));
3395 if (ref_root->found_ref > 0)
3398 backref->reachable = 0;
3400 if (rec->found_ref == 0)
3406 cache = search_cache_extent(root_cache, 0);
3410 rec = container_of(cache, struct root_record, cache);
3411 cache = next_cache_extent(cache);
3413 if (rec->found_ref == 0 &&
3414 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3415 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3416 ret = check_orphan_item(root->fs_info->tree_root,
3422 * If we don't have a root item then we likely just have
3423 * a dir item in a snapshot for this root but no actual
3424 * ref key or anything so it's meaningless.
3426 if (!rec->found_root_item)
3429 fprintf(stderr, "fs tree %llu not referenced\n",
3430 (unsigned long long)rec->objectid);
3434 if (rec->found_ref > 0 && !rec->found_root_item)
3436 list_for_each_entry(backref, &rec->backrefs, list) {
3437 if (!backref->found_dir_item)
3438 backref->errors |= REF_ERR_NO_DIR_ITEM;
3439 if (!backref->found_dir_index)
3440 backref->errors |= REF_ERR_NO_DIR_INDEX;
3441 if (!backref->found_back_ref)
3442 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3443 if (!backref->found_forward_ref)
3444 backref->errors |= REF_ERR_NO_ROOT_REF;
3445 if (backref->reachable && backref->errors)
3452 fprintf(stderr, "fs tree %llu refs %u %s\n",
3453 (unsigned long long)rec->objectid, rec->found_ref,
3454 rec->found_root_item ? "" : "not found");
3456 list_for_each_entry(backref, &rec->backrefs, list) {
3457 if (!backref->reachable)
3459 if (!backref->errors && rec->found_root_item)
3461 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3462 " index %llu namelen %u name %s errors %x\n",
3463 (unsigned long long)backref->ref_root,
3464 (unsigned long long)backref->dir,
3465 (unsigned long long)backref->index,
3466 backref->namelen, backref->name,
3468 print_ref_error(backref->errors);
3471 return errors > 0 ? 1 : 0;
3474 static int process_root_ref(struct extent_buffer *eb, int slot,
3475 struct btrfs_key *key,
3476 struct cache_tree *root_cache)
3482 struct btrfs_root_ref *ref;
3483 char namebuf[BTRFS_NAME_LEN];
3486 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3488 dirid = btrfs_root_ref_dirid(eb, ref);
3489 index = btrfs_root_ref_sequence(eb, ref);
3490 name_len = btrfs_root_ref_name_len(eb, ref);
3492 if (name_len <= BTRFS_NAME_LEN) {
3496 len = BTRFS_NAME_LEN;
3497 error = REF_ERR_NAME_TOO_LONG;
3499 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3501 if (key->type == BTRFS_ROOT_REF_KEY) {
3502 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3503 index, namebuf, len, key->type, error);
3505 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3506 index, namebuf, len, key->type, error);
3511 static void free_corrupt_block(struct cache_extent *cache)
3513 struct btrfs_corrupt_block *corrupt;
3515 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3519 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3522 * Repair the btree of the given root.
3524 * The fix is to remove the node key in corrupt_blocks cache_tree.
3525 * and rebalance the tree.
3526 * After the fix, the btree should be writeable.
3528 static int repair_btree(struct btrfs_root *root,
3529 struct cache_tree *corrupt_blocks)
3531 struct btrfs_trans_handle *trans;
3532 struct btrfs_path *path;
3533 struct btrfs_corrupt_block *corrupt;
3534 struct cache_extent *cache;
3535 struct btrfs_key key;
3540 if (cache_tree_empty(corrupt_blocks))
3543 path = btrfs_alloc_path();
3547 trans = btrfs_start_transaction(root, 1);
3548 if (IS_ERR(trans)) {
3549 ret = PTR_ERR(trans);
3550 fprintf(stderr, "Error starting transaction: %s\n",
3554 cache = first_cache_extent(corrupt_blocks);
3556 corrupt = container_of(cache, struct btrfs_corrupt_block,
3558 level = corrupt->level;
3559 path->lowest_level = level;
3560 key.objectid = corrupt->key.objectid;
3561 key.type = corrupt->key.type;
3562 key.offset = corrupt->key.offset;
3565 * Here we don't want to do any tree balance, since it may
3566 * cause a balance with corrupted brother leaf/node,
3567 * so ins_len set to 0 here.
3568 * Balance will be done after all corrupt node/leaf is deleted.
3570 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3573 offset = btrfs_node_blockptr(path->nodes[level],
3574 path->slots[level]);
3576 /* Remove the ptr */
3577 ret = btrfs_del_ptr(trans, root, path, level,
3578 path->slots[level]);
3582 * Remove the corresponding extent
3583 * return value is not concerned.
3585 btrfs_release_path(path);
3586 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3587 0, root->root_key.objectid,
3589 cache = next_cache_extent(cache);
3592 /* Balance the btree using btrfs_search_slot() */
3593 cache = first_cache_extent(corrupt_blocks);
3595 corrupt = container_of(cache, struct btrfs_corrupt_block,
3597 memcpy(&key, &corrupt->key, sizeof(key));
3598 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3601 /* return will always >0 since it won't find the item */
3603 btrfs_release_path(path);
3604 cache = next_cache_extent(cache);
3607 btrfs_commit_transaction(trans, root);
3609 btrfs_free_path(path);
3613 static int check_fs_root(struct btrfs_root *root,
3614 struct cache_tree *root_cache,
3615 struct walk_control *wc)
3621 struct btrfs_path path;
3622 struct shared_node root_node;
3623 struct root_record *rec;
3624 struct btrfs_root_item *root_item = &root->root_item;
3625 struct cache_tree corrupt_blocks;
3626 struct orphan_data_extent *orphan;
3627 struct orphan_data_extent *tmp;
3628 enum btrfs_tree_block_status status;
3631 * Reuse the corrupt_block cache tree to record corrupted tree block
3633 * Unlike the usage in extent tree check, here we do it in a per
3634 * fs/subvol tree base.
3636 cache_tree_init(&corrupt_blocks);
3637 root->fs_info->corrupt_blocks = &corrupt_blocks;
3639 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3640 rec = get_root_rec(root_cache, root->root_key.objectid);
3641 BUG_ON(IS_ERR(rec));
3642 if (btrfs_root_refs(root_item) > 0)
3643 rec->found_root_item = 1;
3646 btrfs_init_path(&path);
3647 memset(&root_node, 0, sizeof(root_node));
3648 cache_tree_init(&root_node.root_cache);
3649 cache_tree_init(&root_node.inode_cache);
3651 /* Move the orphan extent record to corresponding inode_record */
3652 list_for_each_entry_safe(orphan, tmp,
3653 &root->orphan_data_extents, list) {
3654 struct inode_record *inode;
3656 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3658 BUG_ON(IS_ERR(inode));
3659 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3660 list_move(&orphan->list, &inode->orphan_extents);
3663 level = btrfs_header_level(root->node);
3664 memset(wc->nodes, 0, sizeof(wc->nodes));
3665 wc->nodes[level] = &root_node;
3666 wc->active_node = level;
3667 wc->root_level = level;
3669 /* We may not have checked the root block, lets do that now */
3670 if (btrfs_is_leaf(root->node))
3671 status = btrfs_check_leaf(root, NULL, root->node);
3673 status = btrfs_check_node(root, NULL, root->node);
3674 if (status != BTRFS_TREE_BLOCK_CLEAN)
3677 if (btrfs_root_refs(root_item) > 0 ||
3678 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3679 path.nodes[level] = root->node;
3680 extent_buffer_get(root->node);
3681 path.slots[level] = 0;
3683 struct btrfs_key key;
3684 struct btrfs_disk_key found_key;
3686 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3687 level = root_item->drop_level;
3688 path.lowest_level = level;
3689 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3692 btrfs_node_key(path.nodes[level], &found_key,
3694 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3695 sizeof(found_key)));
3699 wret = walk_down_tree(root, &path, wc, &level);
3705 wret = walk_up_tree(root, &path, wc, &level);
3712 btrfs_release_path(&path);
3714 if (!cache_tree_empty(&corrupt_blocks)) {
3715 struct cache_extent *cache;
3716 struct btrfs_corrupt_block *corrupt;
3718 printf("The following tree block(s) is corrupted in tree %llu:\n",
3719 root->root_key.objectid);
3720 cache = first_cache_extent(&corrupt_blocks);
3722 corrupt = container_of(cache,
3723 struct btrfs_corrupt_block,
3725 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3726 cache->start, corrupt->level,
3727 corrupt->key.objectid, corrupt->key.type,
3728 corrupt->key.offset);
3729 cache = next_cache_extent(cache);
3732 printf("Try to repair the btree for root %llu\n",
3733 root->root_key.objectid);
3734 ret = repair_btree(root, &corrupt_blocks);
3736 fprintf(stderr, "Failed to repair btree: %s\n",
3739 printf("Btree for root %llu is fixed\n",
3740 root->root_key.objectid);
3744 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3748 if (root_node.current) {
3749 root_node.current->checked = 1;
3750 maybe_free_inode_rec(&root_node.inode_cache,
3754 err = check_inode_recs(root, &root_node.inode_cache);
3758 free_corrupt_blocks_tree(&corrupt_blocks);
3759 root->fs_info->corrupt_blocks = NULL;
3760 free_orphan_data_extents(&root->orphan_data_extents);
3764 static int fs_root_objectid(u64 objectid)
3766 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3767 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3769 return is_fstree(objectid);
3772 static int check_fs_roots(struct btrfs_root *root,
3773 struct cache_tree *root_cache)
3775 struct btrfs_path path;
3776 struct btrfs_key key;
3777 struct walk_control wc;
3778 struct extent_buffer *leaf, *tree_node;
3779 struct btrfs_root *tmp_root;
3780 struct btrfs_root *tree_root = root->fs_info->tree_root;
3784 if (ctx.progress_enabled) {
3785 ctx.tp = TASK_FS_ROOTS;
3786 task_start(ctx.info);
3790 * Just in case we made any changes to the extent tree that weren't
3791 * reflected into the free space cache yet.
3794 reset_cached_block_groups(root->fs_info);
3795 memset(&wc, 0, sizeof(wc));
3796 cache_tree_init(&wc.shared);
3797 btrfs_init_path(&path);
3802 key.type = BTRFS_ROOT_ITEM_KEY;
3803 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3808 tree_node = tree_root->node;
3810 if (tree_node != tree_root->node) {
3811 free_root_recs_tree(root_cache);
3812 btrfs_release_path(&path);
3815 leaf = path.nodes[0];
3816 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3817 ret = btrfs_next_leaf(tree_root, &path);
3823 leaf = path.nodes[0];
3825 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3826 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3827 fs_root_objectid(key.objectid)) {
3828 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3829 tmp_root = btrfs_read_fs_root_no_cache(
3830 root->fs_info, &key);
3832 key.offset = (u64)-1;
3833 tmp_root = btrfs_read_fs_root(
3834 root->fs_info, &key);
3836 if (IS_ERR(tmp_root)) {
3840 ret = check_fs_root(tmp_root, root_cache, &wc);
3841 if (ret == -EAGAIN) {
3842 free_root_recs_tree(root_cache);
3843 btrfs_release_path(&path);
3848 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3849 btrfs_free_fs_root(tmp_root);
3850 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3851 key.type == BTRFS_ROOT_BACKREF_KEY) {
3852 process_root_ref(leaf, path.slots[0], &key,
3859 btrfs_release_path(&path);
3861 free_extent_cache_tree(&wc.shared);
3862 if (!cache_tree_empty(&wc.shared))
3863 fprintf(stderr, "warning line %d\n", __LINE__);
3865 task_stop(ctx.info);
3870 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3872 struct list_head *cur = rec->backrefs.next;
3873 struct extent_backref *back;
3874 struct tree_backref *tback;
3875 struct data_backref *dback;
3879 while(cur != &rec->backrefs) {
3880 back = to_extent_backref(cur);
3882 if (!back->found_extent_tree) {
3886 if (back->is_data) {
3887 dback = to_data_backref(back);
3888 fprintf(stderr, "Backref %llu %s %llu"
3889 " owner %llu offset %llu num_refs %lu"
3890 " not found in extent tree\n",
3891 (unsigned long long)rec->start,
3892 back->full_backref ?
3894 back->full_backref ?
3895 (unsigned long long)dback->parent:
3896 (unsigned long long)dback->root,
3897 (unsigned long long)dback->owner,
3898 (unsigned long long)dback->offset,
3899 (unsigned long)dback->num_refs);
3901 tback = to_tree_backref(back);
3902 fprintf(stderr, "Backref %llu parent %llu"
3903 " root %llu not found in extent tree\n",
3904 (unsigned long long)rec->start,
3905 (unsigned long long)tback->parent,
3906 (unsigned long long)tback->root);
3909 if (!back->is_data && !back->found_ref) {
3913 tback = to_tree_backref(back);
3914 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3915 (unsigned long long)rec->start,
3916 back->full_backref ? "parent" : "root",
3917 back->full_backref ?
3918 (unsigned long long)tback->parent :
3919 (unsigned long long)tback->root, back);
3921 if (back->is_data) {
3922 dback = to_data_backref(back);
3923 if (dback->found_ref != dback->num_refs) {
3927 fprintf(stderr, "Incorrect local backref count"
3928 " on %llu %s %llu owner %llu"
3929 " offset %llu found %u wanted %u back %p\n",
3930 (unsigned long long)rec->start,
3931 back->full_backref ?
3933 back->full_backref ?
3934 (unsigned long long)dback->parent:
3935 (unsigned long long)dback->root,
3936 (unsigned long long)dback->owner,
3937 (unsigned long long)dback->offset,
3938 dback->found_ref, dback->num_refs, back);
3940 if (dback->disk_bytenr != rec->start) {
3944 fprintf(stderr, "Backref disk bytenr does not"
3945 " match extent record, bytenr=%llu, "
3946 "ref bytenr=%llu\n",
3947 (unsigned long long)rec->start,
3948 (unsigned long long)dback->disk_bytenr);
3951 if (dback->bytes != rec->nr) {
3955 fprintf(stderr, "Backref bytes do not match "
3956 "extent backref, bytenr=%llu, ref "
3957 "bytes=%llu, backref bytes=%llu\n",
3958 (unsigned long long)rec->start,
3959 (unsigned long long)rec->nr,
3960 (unsigned long long)dback->bytes);
3963 if (!back->is_data) {
3966 dback = to_data_backref(back);
3967 found += dback->found_ref;
3970 if (found != rec->refs) {
3974 fprintf(stderr, "Incorrect global backref count "
3975 "on %llu found %llu wanted %llu\n",
3976 (unsigned long long)rec->start,
3977 (unsigned long long)found,
3978 (unsigned long long)rec->refs);
3984 static int free_all_extent_backrefs(struct extent_record *rec)
3986 struct extent_backref *back;
3987 struct list_head *cur;
3988 while (!list_empty(&rec->backrefs)) {
3989 cur = rec->backrefs.next;
3990 back = to_extent_backref(cur);
3997 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
3998 struct cache_tree *extent_cache)
4000 struct cache_extent *cache;
4001 struct extent_record *rec;
4004 cache = first_cache_extent(extent_cache);
4007 rec = container_of(cache, struct extent_record, cache);
4008 remove_cache_extent(extent_cache, cache);
4009 free_all_extent_backrefs(rec);
4014 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4015 struct extent_record *rec)
4017 if (rec->content_checked && rec->owner_ref_checked &&
4018 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4019 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4020 !rec->bad_full_backref && !rec->crossing_stripes &&
4021 !rec->wrong_chunk_type) {
4022 remove_cache_extent(extent_cache, &rec->cache);
4023 free_all_extent_backrefs(rec);
4024 list_del_init(&rec->list);
4030 static int check_owner_ref(struct btrfs_root *root,
4031 struct extent_record *rec,
4032 struct extent_buffer *buf)
4034 struct extent_backref *node;
4035 struct tree_backref *back;
4036 struct btrfs_root *ref_root;
4037 struct btrfs_key key;
4038 struct btrfs_path path;
4039 struct extent_buffer *parent;
4044 list_for_each_entry(node, &rec->backrefs, list) {
4047 if (!node->found_ref)
4049 if (node->full_backref)
4051 back = to_tree_backref(node);
4052 if (btrfs_header_owner(buf) == back->root)
4055 BUG_ON(rec->is_root);
4057 /* try to find the block by search corresponding fs tree */
4058 key.objectid = btrfs_header_owner(buf);
4059 key.type = BTRFS_ROOT_ITEM_KEY;
4060 key.offset = (u64)-1;
4062 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4063 if (IS_ERR(ref_root))
4066 level = btrfs_header_level(buf);
4068 btrfs_item_key_to_cpu(buf, &key, 0);
4070 btrfs_node_key_to_cpu(buf, &key, 0);
4072 btrfs_init_path(&path);
4073 path.lowest_level = level + 1;
4074 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4078 parent = path.nodes[level + 1];
4079 if (parent && buf->start == btrfs_node_blockptr(parent,
4080 path.slots[level + 1]))
4083 btrfs_release_path(&path);
4084 return found ? 0 : 1;
4087 static int is_extent_tree_record(struct extent_record *rec)
4089 struct list_head *cur = rec->backrefs.next;
4090 struct extent_backref *node;
4091 struct tree_backref *back;
4094 while(cur != &rec->backrefs) {
4095 node = to_extent_backref(cur);
4099 back = to_tree_backref(node);
4100 if (node->full_backref)
4102 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4109 static int record_bad_block_io(struct btrfs_fs_info *info,
4110 struct cache_tree *extent_cache,
4113 struct extent_record *rec;
4114 struct cache_extent *cache;
4115 struct btrfs_key key;
4117 cache = lookup_cache_extent(extent_cache, start, len);
4121 rec = container_of(cache, struct extent_record, cache);
4122 if (!is_extent_tree_record(rec))
4125 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4126 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4129 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4130 struct extent_buffer *buf, int slot)
4132 if (btrfs_header_level(buf)) {
4133 struct btrfs_key_ptr ptr1, ptr2;
4135 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4136 sizeof(struct btrfs_key_ptr));
4137 read_extent_buffer(buf, &ptr2,
4138 btrfs_node_key_ptr_offset(slot + 1),
4139 sizeof(struct btrfs_key_ptr));
4140 write_extent_buffer(buf, &ptr1,
4141 btrfs_node_key_ptr_offset(slot + 1),
4142 sizeof(struct btrfs_key_ptr));
4143 write_extent_buffer(buf, &ptr2,
4144 btrfs_node_key_ptr_offset(slot),
4145 sizeof(struct btrfs_key_ptr));
4147 struct btrfs_disk_key key;
4148 btrfs_node_key(buf, &key, 0);
4149 btrfs_fixup_low_keys(root, path, &key,
4150 btrfs_header_level(buf) + 1);
4153 struct btrfs_item *item1, *item2;
4154 struct btrfs_key k1, k2;
4155 char *item1_data, *item2_data;
4156 u32 item1_offset, item2_offset, item1_size, item2_size;
4158 item1 = btrfs_item_nr(slot);
4159 item2 = btrfs_item_nr(slot + 1);
4160 btrfs_item_key_to_cpu(buf, &k1, slot);
4161 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4162 item1_offset = btrfs_item_offset(buf, item1);
4163 item2_offset = btrfs_item_offset(buf, item2);
4164 item1_size = btrfs_item_size(buf, item1);
4165 item2_size = btrfs_item_size(buf, item2);
4167 item1_data = malloc(item1_size);
4170 item2_data = malloc(item2_size);
4176 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4177 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4179 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4180 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4184 btrfs_set_item_offset(buf, item1, item2_offset);
4185 btrfs_set_item_offset(buf, item2, item1_offset);
4186 btrfs_set_item_size(buf, item1, item2_size);
4187 btrfs_set_item_size(buf, item2, item1_size);
4189 path->slots[0] = slot;
4190 btrfs_set_item_key_unsafe(root, path, &k2);
4191 path->slots[0] = slot + 1;
4192 btrfs_set_item_key_unsafe(root, path, &k1);
4197 static int fix_key_order(struct btrfs_trans_handle *trans,
4198 struct btrfs_root *root,
4199 struct btrfs_path *path)
4201 struct extent_buffer *buf;
4202 struct btrfs_key k1, k2;
4204 int level = path->lowest_level;
4207 buf = path->nodes[level];
4208 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4210 btrfs_node_key_to_cpu(buf, &k1, i);
4211 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4213 btrfs_item_key_to_cpu(buf, &k1, i);
4214 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4216 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4218 ret = swap_values(root, path, buf, i);
4221 btrfs_mark_buffer_dirty(buf);
4227 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4228 struct btrfs_root *root,
4229 struct btrfs_path *path,
4230 struct extent_buffer *buf, int slot)
4232 struct btrfs_key key;
4233 int nritems = btrfs_header_nritems(buf);
4235 btrfs_item_key_to_cpu(buf, &key, slot);
4237 /* These are all the keys we can deal with missing. */
4238 if (key.type != BTRFS_DIR_INDEX_KEY &&
4239 key.type != BTRFS_EXTENT_ITEM_KEY &&
4240 key.type != BTRFS_METADATA_ITEM_KEY &&
4241 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4242 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4245 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4246 (unsigned long long)key.objectid, key.type,
4247 (unsigned long long)key.offset, slot, buf->start);
4248 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4249 btrfs_item_nr_offset(slot + 1),
4250 sizeof(struct btrfs_item) *
4251 (nritems - slot - 1));
4252 btrfs_set_header_nritems(buf, nritems - 1);
4254 struct btrfs_disk_key disk_key;
4256 btrfs_item_key(buf, &disk_key, 0);
4257 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4259 btrfs_mark_buffer_dirty(buf);
4263 static int fix_item_offset(struct btrfs_trans_handle *trans,
4264 struct btrfs_root *root,
4265 struct btrfs_path *path)
4267 struct extent_buffer *buf;
4271 /* We should only get this for leaves */
4272 BUG_ON(path->lowest_level);
4273 buf = path->nodes[0];
4275 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4276 unsigned int shift = 0, offset;
4278 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4279 BTRFS_LEAF_DATA_SIZE(root)) {
4280 if (btrfs_item_end_nr(buf, i) >
4281 BTRFS_LEAF_DATA_SIZE(root)) {
4282 ret = delete_bogus_item(trans, root, path,
4286 fprintf(stderr, "item is off the end of the "
4287 "leaf, can't fix\n");
4291 shift = BTRFS_LEAF_DATA_SIZE(root) -
4292 btrfs_item_end_nr(buf, i);
4293 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4294 btrfs_item_offset_nr(buf, i - 1)) {
4295 if (btrfs_item_end_nr(buf, i) >
4296 btrfs_item_offset_nr(buf, i - 1)) {
4297 ret = delete_bogus_item(trans, root, path,
4301 fprintf(stderr, "items overlap, can't fix\n");
4305 shift = btrfs_item_offset_nr(buf, i - 1) -
4306 btrfs_item_end_nr(buf, i);
4311 printf("Shifting item nr %d by %u bytes in block %llu\n",
4312 i, shift, (unsigned long long)buf->start);
4313 offset = btrfs_item_offset_nr(buf, i);
4314 memmove_extent_buffer(buf,
4315 btrfs_leaf_data(buf) + offset + shift,
4316 btrfs_leaf_data(buf) + offset,
4317 btrfs_item_size_nr(buf, i));
4318 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4320 btrfs_mark_buffer_dirty(buf);
4324 * We may have moved things, in which case we want to exit so we don't
4325 * write those changes out. Once we have proper abort functionality in
4326 * progs this can be changed to something nicer.
4333 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4334 * then just return -EIO.
4336 static int try_to_fix_bad_block(struct btrfs_root *root,
4337 struct extent_buffer *buf,
4338 enum btrfs_tree_block_status status)
4340 struct btrfs_trans_handle *trans;
4341 struct ulist *roots;
4342 struct ulist_node *node;
4343 struct btrfs_root *search_root;
4344 struct btrfs_path *path;
4345 struct ulist_iterator iter;
4346 struct btrfs_key root_key, key;
4349 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4350 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4353 path = btrfs_alloc_path();
4357 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4360 btrfs_free_path(path);
4364 ULIST_ITER_INIT(&iter);
4365 while ((node = ulist_next(roots, &iter))) {
4366 root_key.objectid = node->val;
4367 root_key.type = BTRFS_ROOT_ITEM_KEY;
4368 root_key.offset = (u64)-1;
4370 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4377 trans = btrfs_start_transaction(search_root, 0);
4378 if (IS_ERR(trans)) {
4379 ret = PTR_ERR(trans);
4383 path->lowest_level = btrfs_header_level(buf);
4384 path->skip_check_block = 1;
4385 if (path->lowest_level)
4386 btrfs_node_key_to_cpu(buf, &key, 0);
4388 btrfs_item_key_to_cpu(buf, &key, 0);
4389 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4392 btrfs_commit_transaction(trans, search_root);
4395 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4396 ret = fix_key_order(trans, search_root, path);
4397 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4398 ret = fix_item_offset(trans, search_root, path);
4400 btrfs_commit_transaction(trans, search_root);
4403 btrfs_release_path(path);
4404 btrfs_commit_transaction(trans, search_root);
4407 btrfs_free_path(path);
4411 static int check_block(struct btrfs_root *root,
4412 struct cache_tree *extent_cache,
4413 struct extent_buffer *buf, u64 flags)
4415 struct extent_record *rec;
4416 struct cache_extent *cache;
4417 struct btrfs_key key;
4418 enum btrfs_tree_block_status status;
4422 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4425 rec = container_of(cache, struct extent_record, cache);
4426 rec->generation = btrfs_header_generation(buf);
4428 level = btrfs_header_level(buf);
4429 if (btrfs_header_nritems(buf) > 0) {
4432 btrfs_item_key_to_cpu(buf, &key, 0);
4434 btrfs_node_key_to_cpu(buf, &key, 0);
4436 rec->info_objectid = key.objectid;
4438 rec->info_level = level;
4440 if (btrfs_is_leaf(buf))
4441 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4443 status = btrfs_check_node(root, &rec->parent_key, buf);
4445 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4447 status = try_to_fix_bad_block(root, buf, status);
4448 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4450 fprintf(stderr, "bad block %llu\n",
4451 (unsigned long long)buf->start);
4454 * Signal to callers we need to start the scan over
4455 * again since we'll have cowed blocks.
4460 rec->content_checked = 1;
4461 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4462 rec->owner_ref_checked = 1;
4464 ret = check_owner_ref(root, rec, buf);
4466 rec->owner_ref_checked = 1;
4470 maybe_free_extent_rec(extent_cache, rec);
4475 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4476 u64 parent, u64 root)
4478 struct rb_node *node;
4479 struct tree_backref *back = NULL;
4480 struct tree_backref match = {
4487 match.parent = parent;
4488 match.node.full_backref = 1;
4493 node = rb_search(&rec->backref_tree, &match.node.node,
4494 (rb_compare_keys)compare_extent_backref, NULL);
4496 back = to_tree_backref(rb_node_to_extent_backref(node));
4501 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4502 u64 parent, u64 root)
4504 struct tree_backref *ref = malloc(sizeof(*ref));
4508 memset(&ref->node, 0, sizeof(ref->node));
4510 ref->parent = parent;
4511 ref->node.full_backref = 1;
4514 ref->node.full_backref = 0;
4516 list_add_tail(&ref->node.list, &rec->backrefs);
4517 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4522 static struct data_backref *find_data_backref(struct extent_record *rec,
4523 u64 parent, u64 root,
4524 u64 owner, u64 offset,
4526 u64 disk_bytenr, u64 bytes)
4528 struct rb_node *node;
4529 struct data_backref *back = NULL;
4530 struct data_backref match = {
4537 .found_ref = found_ref,
4538 .disk_bytenr = disk_bytenr,
4542 match.parent = parent;
4543 match.node.full_backref = 1;
4548 node = rb_search(&rec->backref_tree, &match.node.node,
4549 (rb_compare_keys)compare_extent_backref, NULL);
4551 back = to_data_backref(rb_node_to_extent_backref(node));
4556 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4557 u64 parent, u64 root,
4558 u64 owner, u64 offset,
4561 struct data_backref *ref = malloc(sizeof(*ref));
4565 memset(&ref->node, 0, sizeof(ref->node));
4566 ref->node.is_data = 1;
4569 ref->parent = parent;
4572 ref->node.full_backref = 1;
4576 ref->offset = offset;
4577 ref->node.full_backref = 0;
4579 ref->bytes = max_size;
4582 list_add_tail(&ref->node.list, &rec->backrefs);
4583 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4584 if (max_size > rec->max_size)
4585 rec->max_size = max_size;
4589 /* Check if the type of extent matches with its chunk */
4590 static void check_extent_type(struct extent_record *rec)
4592 struct btrfs_block_group_cache *bg_cache;
4594 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4598 /* data extent, check chunk directly*/
4599 if (!rec->metadata) {
4600 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4601 rec->wrong_chunk_type = 1;
4605 /* metadata extent, check the obvious case first */
4606 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4607 BTRFS_BLOCK_GROUP_METADATA))) {
4608 rec->wrong_chunk_type = 1;
4613 * Check SYSTEM extent, as it's also marked as metadata, we can only
4614 * make sure it's a SYSTEM extent by its backref
4616 if (!list_empty(&rec->backrefs)) {
4617 struct extent_backref *node;
4618 struct tree_backref *tback;
4621 node = to_extent_backref(rec->backrefs.next);
4622 if (node->is_data) {
4623 /* tree block shouldn't have data backref */
4624 rec->wrong_chunk_type = 1;
4627 tback = container_of(node, struct tree_backref, node);
4629 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4630 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4632 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4633 if (!(bg_cache->flags & bg_type))
4634 rec->wrong_chunk_type = 1;
4639 * Allocate a new extent record, fill default values from @tmpl and insert int
4640 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4641 * the cache, otherwise it fails.
4643 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4644 struct extent_record *tmpl)
4646 struct extent_record *rec;
4649 rec = malloc(sizeof(*rec));
4652 rec->start = tmpl->start;
4653 rec->max_size = tmpl->max_size;
4654 rec->nr = max(tmpl->nr, tmpl->max_size);
4655 rec->found_rec = tmpl->found_rec;
4656 rec->content_checked = tmpl->content_checked;
4657 rec->owner_ref_checked = tmpl->owner_ref_checked;
4658 rec->num_duplicates = 0;
4659 rec->metadata = tmpl->metadata;
4660 rec->flag_block_full_backref = FLAG_UNSET;
4661 rec->bad_full_backref = 0;
4662 rec->crossing_stripes = 0;
4663 rec->wrong_chunk_type = 0;
4664 rec->is_root = tmpl->is_root;
4665 rec->refs = tmpl->refs;
4666 rec->extent_item_refs = tmpl->extent_item_refs;
4667 rec->parent_generation = tmpl->parent_generation;
4668 INIT_LIST_HEAD(&rec->backrefs);
4669 INIT_LIST_HEAD(&rec->dups);
4670 INIT_LIST_HEAD(&rec->list);
4671 rec->backref_tree = RB_ROOT;
4672 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4673 rec->cache.start = tmpl->start;
4674 rec->cache.size = tmpl->nr;
4675 ret = insert_cache_extent(extent_cache, &rec->cache);
4677 bytes_used += rec->nr;
4680 rec->crossing_stripes = check_crossing_stripes(rec->start,
4681 global_info->tree_root->nodesize);
4682 check_extent_type(rec);
4687 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4689 * - refs - if found, increase refs
4690 * - is_root - if found, set
4691 * - content_checked - if found, set
4692 * - owner_ref_checked - if found, set
4694 * If not found, create a new one, initialize and insert.
4696 static int add_extent_rec(struct cache_tree *extent_cache,
4697 struct extent_record *tmpl)
4699 struct extent_record *rec;
4700 struct cache_extent *cache;
4704 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4706 rec = container_of(cache, struct extent_record, cache);
4710 rec->nr = max(tmpl->nr, tmpl->max_size);
4713 * We need to make sure to reset nr to whatever the extent
4714 * record says was the real size, this way we can compare it to
4717 if (tmpl->found_rec) {
4718 if (tmpl->start != rec->start || rec->found_rec) {
4719 struct extent_record *tmp;
4722 if (list_empty(&rec->list))
4723 list_add_tail(&rec->list,
4724 &duplicate_extents);
4727 * We have to do this song and dance in case we
4728 * find an extent record that falls inside of
4729 * our current extent record but does not have
4730 * the same objectid.
4732 tmp = malloc(sizeof(*tmp));
4735 tmp->start = tmpl->start;
4736 tmp->max_size = tmpl->max_size;
4739 tmp->metadata = tmpl->metadata;
4740 tmp->extent_item_refs = tmpl->extent_item_refs;
4741 INIT_LIST_HEAD(&tmp->list);
4742 list_add_tail(&tmp->list, &rec->dups);
4743 rec->num_duplicates++;
4750 if (tmpl->extent_item_refs && !dup) {
4751 if (rec->extent_item_refs) {
4752 fprintf(stderr, "block %llu rec "
4753 "extent_item_refs %llu, passed %llu\n",
4754 (unsigned long long)tmpl->start,
4755 (unsigned long long)
4756 rec->extent_item_refs,
4757 (unsigned long long)tmpl->extent_item_refs);
4759 rec->extent_item_refs = tmpl->extent_item_refs;
4763 if (tmpl->content_checked)
4764 rec->content_checked = 1;
4765 if (tmpl->owner_ref_checked)
4766 rec->owner_ref_checked = 1;
4767 memcpy(&rec->parent_key, &tmpl->parent_key,
4768 sizeof(tmpl->parent_key));
4769 if (tmpl->parent_generation)
4770 rec->parent_generation = tmpl->parent_generation;
4771 if (rec->max_size < tmpl->max_size)
4772 rec->max_size = tmpl->max_size;
4775 * A metadata extent can't cross stripe_len boundary, otherwise
4776 * kernel scrub won't be able to handle it.
4777 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4781 rec->crossing_stripes = check_crossing_stripes(
4782 rec->start, global_info->tree_root->nodesize);
4783 check_extent_type(rec);
4784 maybe_free_extent_rec(extent_cache, rec);
4788 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4793 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4794 u64 parent, u64 root, int found_ref)
4796 struct extent_record *rec;
4797 struct tree_backref *back;
4798 struct cache_extent *cache;
4800 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4802 struct extent_record tmpl;
4804 memset(&tmpl, 0, sizeof(tmpl));
4805 tmpl.start = bytenr;
4809 add_extent_rec_nolookup(extent_cache, &tmpl);
4811 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4816 rec = container_of(cache, struct extent_record, cache);
4817 if (rec->start != bytenr) {
4821 back = find_tree_backref(rec, parent, root);
4823 back = alloc_tree_backref(rec, parent, root);
4828 if (back->node.found_ref) {
4829 fprintf(stderr, "Extent back ref already exists "
4830 "for %llu parent %llu root %llu \n",
4831 (unsigned long long)bytenr,
4832 (unsigned long long)parent,
4833 (unsigned long long)root);
4835 back->node.found_ref = 1;
4837 if (back->node.found_extent_tree) {
4838 fprintf(stderr, "Extent back ref already exists "
4839 "for %llu parent %llu root %llu \n",
4840 (unsigned long long)bytenr,
4841 (unsigned long long)parent,
4842 (unsigned long long)root);
4844 back->node.found_extent_tree = 1;
4846 check_extent_type(rec);
4847 maybe_free_extent_rec(extent_cache, rec);
4851 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4852 u64 parent, u64 root, u64 owner, u64 offset,
4853 u32 num_refs, int found_ref, u64 max_size)
4855 struct extent_record *rec;
4856 struct data_backref *back;
4857 struct cache_extent *cache;
4859 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4861 struct extent_record tmpl;
4863 memset(&tmpl, 0, sizeof(tmpl));
4864 tmpl.start = bytenr;
4866 tmpl.max_size = max_size;
4868 add_extent_rec_nolookup(extent_cache, &tmpl);
4870 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4875 rec = container_of(cache, struct extent_record, cache);
4876 if (rec->max_size < max_size)
4877 rec->max_size = max_size;
4880 * If found_ref is set then max_size is the real size and must match the
4881 * existing refs. So if we have already found a ref then we need to
4882 * make sure that this ref matches the existing one, otherwise we need
4883 * to add a new backref so we can notice that the backrefs don't match
4884 * and we need to figure out who is telling the truth. This is to
4885 * account for that awful fsync bug I introduced where we'd end up with
4886 * a btrfs_file_extent_item that would have its length include multiple
4887 * prealloc extents or point inside of a prealloc extent.
4889 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4892 back = alloc_data_backref(rec, parent, root, owner, offset,
4898 BUG_ON(num_refs != 1);
4899 if (back->node.found_ref)
4900 BUG_ON(back->bytes != max_size);
4901 back->node.found_ref = 1;
4902 back->found_ref += 1;
4903 back->bytes = max_size;
4904 back->disk_bytenr = bytenr;
4906 rec->content_checked = 1;
4907 rec->owner_ref_checked = 1;
4909 if (back->node.found_extent_tree) {
4910 fprintf(stderr, "Extent back ref already exists "
4911 "for %llu parent %llu root %llu "
4912 "owner %llu offset %llu num_refs %lu\n",
4913 (unsigned long long)bytenr,
4914 (unsigned long long)parent,
4915 (unsigned long long)root,
4916 (unsigned long long)owner,
4917 (unsigned long long)offset,
4918 (unsigned long)num_refs);
4920 back->num_refs = num_refs;
4921 back->node.found_extent_tree = 1;
4923 maybe_free_extent_rec(extent_cache, rec);
4927 static int add_pending(struct cache_tree *pending,
4928 struct cache_tree *seen, u64 bytenr, u32 size)
4931 ret = add_cache_extent(seen, bytenr, size);
4934 add_cache_extent(pending, bytenr, size);
4938 static int pick_next_pending(struct cache_tree *pending,
4939 struct cache_tree *reada,
4940 struct cache_tree *nodes,
4941 u64 last, struct block_info *bits, int bits_nr,
4944 unsigned long node_start = last;
4945 struct cache_extent *cache;
4948 cache = search_cache_extent(reada, 0);
4950 bits[0].start = cache->start;
4951 bits[0].size = cache->size;
4956 if (node_start > 32768)
4957 node_start -= 32768;
4959 cache = search_cache_extent(nodes, node_start);
4961 cache = search_cache_extent(nodes, 0);
4964 cache = search_cache_extent(pending, 0);
4969 bits[ret].start = cache->start;
4970 bits[ret].size = cache->size;
4971 cache = next_cache_extent(cache);
4973 } while (cache && ret < bits_nr);
4979 bits[ret].start = cache->start;
4980 bits[ret].size = cache->size;
4981 cache = next_cache_extent(cache);
4983 } while (cache && ret < bits_nr);
4985 if (bits_nr - ret > 8) {
4986 u64 lookup = bits[0].start + bits[0].size;
4987 struct cache_extent *next;
4988 next = search_cache_extent(pending, lookup);
4990 if (next->start - lookup > 32768)
4992 bits[ret].start = next->start;
4993 bits[ret].size = next->size;
4994 lookup = next->start + next->size;
4998 next = next_cache_extent(next);
5006 static void free_chunk_record(struct cache_extent *cache)
5008 struct chunk_record *rec;
5010 rec = container_of(cache, struct chunk_record, cache);
5011 list_del_init(&rec->list);
5012 list_del_init(&rec->dextents);
5016 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5018 cache_tree_free_extents(chunk_cache, free_chunk_record);
5021 static void free_device_record(struct rb_node *node)
5023 struct device_record *rec;
5025 rec = container_of(node, struct device_record, node);
5029 FREE_RB_BASED_TREE(device_cache, free_device_record);
5031 int insert_block_group_record(struct block_group_tree *tree,
5032 struct block_group_record *bg_rec)
5036 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5040 list_add_tail(&bg_rec->list, &tree->block_groups);
5044 static void free_block_group_record(struct cache_extent *cache)
5046 struct block_group_record *rec;
5048 rec = container_of(cache, struct block_group_record, cache);
5049 list_del_init(&rec->list);
5053 void free_block_group_tree(struct block_group_tree *tree)
5055 cache_tree_free_extents(&tree->tree, free_block_group_record);
5058 int insert_device_extent_record(struct device_extent_tree *tree,
5059 struct device_extent_record *de_rec)
5064 * Device extent is a bit different from the other extents, because
5065 * the extents which belong to the different devices may have the
5066 * same start and size, so we need use the special extent cache
5067 * search/insert functions.
5069 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5073 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5074 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5078 static void free_device_extent_record(struct cache_extent *cache)
5080 struct device_extent_record *rec;
5082 rec = container_of(cache, struct device_extent_record, cache);
5083 if (!list_empty(&rec->chunk_list))
5084 list_del_init(&rec->chunk_list);
5085 if (!list_empty(&rec->device_list))
5086 list_del_init(&rec->device_list);
5090 void free_device_extent_tree(struct device_extent_tree *tree)
5092 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5095 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5096 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5097 struct extent_buffer *leaf, int slot)
5099 struct btrfs_extent_ref_v0 *ref0;
5100 struct btrfs_key key;
5102 btrfs_item_key_to_cpu(leaf, &key, slot);
5103 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5104 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5105 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5107 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5108 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5114 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5115 struct btrfs_key *key,
5118 struct btrfs_chunk *ptr;
5119 struct chunk_record *rec;
5122 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5123 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5125 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5127 fprintf(stderr, "memory allocation failed\n");
5131 INIT_LIST_HEAD(&rec->list);
5132 INIT_LIST_HEAD(&rec->dextents);
5135 rec->cache.start = key->offset;
5136 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5138 rec->generation = btrfs_header_generation(leaf);
5140 rec->objectid = key->objectid;
5141 rec->type = key->type;
5142 rec->offset = key->offset;
5144 rec->length = rec->cache.size;
5145 rec->owner = btrfs_chunk_owner(leaf, ptr);
5146 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5147 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5148 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5149 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5150 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5151 rec->num_stripes = num_stripes;
5152 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5154 for (i = 0; i < rec->num_stripes; ++i) {
5155 rec->stripes[i].devid =
5156 btrfs_stripe_devid_nr(leaf, ptr, i);
5157 rec->stripes[i].offset =
5158 btrfs_stripe_offset_nr(leaf, ptr, i);
5159 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5160 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5167 static int process_chunk_item(struct cache_tree *chunk_cache,
5168 struct btrfs_key *key, struct extent_buffer *eb,
5171 struct chunk_record *rec;
5174 rec = btrfs_new_chunk_record(eb, key, slot);
5175 ret = insert_cache_extent(chunk_cache, &rec->cache);
5177 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5178 rec->offset, rec->length);
5185 static int process_device_item(struct rb_root *dev_cache,
5186 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5188 struct btrfs_dev_item *ptr;
5189 struct device_record *rec;
5192 ptr = btrfs_item_ptr(eb,
5193 slot, struct btrfs_dev_item);
5195 rec = malloc(sizeof(*rec));
5197 fprintf(stderr, "memory allocation failed\n");
5201 rec->devid = key->offset;
5202 rec->generation = btrfs_header_generation(eb);
5204 rec->objectid = key->objectid;
5205 rec->type = key->type;
5206 rec->offset = key->offset;
5208 rec->devid = btrfs_device_id(eb, ptr);
5209 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5210 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5212 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5214 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5221 struct block_group_record *
5222 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5225 struct btrfs_block_group_item *ptr;
5226 struct block_group_record *rec;
5228 rec = calloc(1, sizeof(*rec));
5230 fprintf(stderr, "memory allocation failed\n");
5234 rec->cache.start = key->objectid;
5235 rec->cache.size = key->offset;
5237 rec->generation = btrfs_header_generation(leaf);
5239 rec->objectid = key->objectid;
5240 rec->type = key->type;
5241 rec->offset = key->offset;
5243 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5244 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5246 INIT_LIST_HEAD(&rec->list);
5251 static int process_block_group_item(struct block_group_tree *block_group_cache,
5252 struct btrfs_key *key,
5253 struct extent_buffer *eb, int slot)
5255 struct block_group_record *rec;
5258 rec = btrfs_new_block_group_record(eb, key, slot);
5259 ret = insert_block_group_record(block_group_cache, rec);
5261 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5262 rec->objectid, rec->offset);
5269 struct device_extent_record *
5270 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5271 struct btrfs_key *key, int slot)
5273 struct device_extent_record *rec;
5274 struct btrfs_dev_extent *ptr;
5276 rec = calloc(1, sizeof(*rec));
5278 fprintf(stderr, "memory allocation failed\n");
5282 rec->cache.objectid = key->objectid;
5283 rec->cache.start = key->offset;
5285 rec->generation = btrfs_header_generation(leaf);
5287 rec->objectid = key->objectid;
5288 rec->type = key->type;
5289 rec->offset = key->offset;
5291 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5292 rec->chunk_objecteid =
5293 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5295 btrfs_dev_extent_chunk_offset(leaf, ptr);
5296 rec->length = btrfs_dev_extent_length(leaf, ptr);
5297 rec->cache.size = rec->length;
5299 INIT_LIST_HEAD(&rec->chunk_list);
5300 INIT_LIST_HEAD(&rec->device_list);
5306 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5307 struct btrfs_key *key, struct extent_buffer *eb,
5310 struct device_extent_record *rec;
5313 rec = btrfs_new_device_extent_record(eb, key, slot);
5314 ret = insert_device_extent_record(dev_extent_cache, rec);
5317 "Device extent[%llu, %llu, %llu] existed.\n",
5318 rec->objectid, rec->offset, rec->length);
5325 static int process_extent_item(struct btrfs_root *root,
5326 struct cache_tree *extent_cache,
5327 struct extent_buffer *eb, int slot)
5329 struct btrfs_extent_item *ei;
5330 struct btrfs_extent_inline_ref *iref;
5331 struct btrfs_extent_data_ref *dref;
5332 struct btrfs_shared_data_ref *sref;
5333 struct btrfs_key key;
5334 struct extent_record tmpl;
5338 u32 item_size = btrfs_item_size_nr(eb, slot);
5344 btrfs_item_key_to_cpu(eb, &key, slot);
5346 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5348 num_bytes = root->nodesize;
5350 num_bytes = key.offset;
5353 if (item_size < sizeof(*ei)) {
5354 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5355 struct btrfs_extent_item_v0 *ei0;
5356 BUG_ON(item_size != sizeof(*ei0));
5357 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5358 refs = btrfs_extent_refs_v0(eb, ei0);
5362 memset(&tmpl, 0, sizeof(tmpl));
5363 tmpl.start = key.objectid;
5364 tmpl.nr = num_bytes;
5365 tmpl.extent_item_refs = refs;
5366 tmpl.metadata = metadata;
5368 tmpl.max_size = num_bytes;
5370 return add_extent_rec(extent_cache, &tmpl);
5373 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5374 refs = btrfs_extent_refs(eb, ei);
5375 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5380 memset(&tmpl, 0, sizeof(tmpl));
5381 tmpl.start = key.objectid;
5382 tmpl.nr = num_bytes;
5383 tmpl.extent_item_refs = refs;
5384 tmpl.metadata = metadata;
5386 tmpl.max_size = num_bytes;
5387 add_extent_rec(extent_cache, &tmpl);
5389 ptr = (unsigned long)(ei + 1);
5390 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5391 key.type == BTRFS_EXTENT_ITEM_KEY)
5392 ptr += sizeof(struct btrfs_tree_block_info);
5394 end = (unsigned long)ei + item_size;
5396 iref = (struct btrfs_extent_inline_ref *)ptr;
5397 type = btrfs_extent_inline_ref_type(eb, iref);
5398 offset = btrfs_extent_inline_ref_offset(eb, iref);
5400 case BTRFS_TREE_BLOCK_REF_KEY:
5401 add_tree_backref(extent_cache, key.objectid,
5404 case BTRFS_SHARED_BLOCK_REF_KEY:
5405 add_tree_backref(extent_cache, key.objectid,
5408 case BTRFS_EXTENT_DATA_REF_KEY:
5409 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5410 add_data_backref(extent_cache, key.objectid, 0,
5411 btrfs_extent_data_ref_root(eb, dref),
5412 btrfs_extent_data_ref_objectid(eb,
5414 btrfs_extent_data_ref_offset(eb, dref),
5415 btrfs_extent_data_ref_count(eb, dref),
5418 case BTRFS_SHARED_DATA_REF_KEY:
5419 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5420 add_data_backref(extent_cache, key.objectid, offset,
5422 btrfs_shared_data_ref_count(eb, sref),
5426 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5427 key.objectid, key.type, num_bytes);
5430 ptr += btrfs_extent_inline_ref_size(type);
5437 static int check_cache_range(struct btrfs_root *root,
5438 struct btrfs_block_group_cache *cache,
5439 u64 offset, u64 bytes)
5441 struct btrfs_free_space *entry;
5447 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5448 bytenr = btrfs_sb_offset(i);
5449 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5450 cache->key.objectid, bytenr, 0,
5451 &logical, &nr, &stripe_len);
5456 if (logical[nr] + stripe_len <= offset)
5458 if (offset + bytes <= logical[nr])
5460 if (logical[nr] == offset) {
5461 if (stripe_len >= bytes) {
5465 bytes -= stripe_len;
5466 offset += stripe_len;
5467 } else if (logical[nr] < offset) {
5468 if (logical[nr] + stripe_len >=
5473 bytes = (offset + bytes) -
5474 (logical[nr] + stripe_len);
5475 offset = logical[nr] + stripe_len;
5478 * Could be tricky, the super may land in the
5479 * middle of the area we're checking. First
5480 * check the easiest case, it's at the end.
5482 if (logical[nr] + stripe_len >=
5484 bytes = logical[nr] - offset;
5488 /* Check the left side */
5489 ret = check_cache_range(root, cache,
5491 logical[nr] - offset);
5497 /* Now we continue with the right side */
5498 bytes = (offset + bytes) -
5499 (logical[nr] + stripe_len);
5500 offset = logical[nr] + stripe_len;
5507 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5509 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5510 offset, offset+bytes);
5514 if (entry->offset != offset) {
5515 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5520 if (entry->bytes != bytes) {
5521 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5522 bytes, entry->bytes, offset);
5526 unlink_free_space(cache->free_space_ctl, entry);
5531 static int verify_space_cache(struct btrfs_root *root,
5532 struct btrfs_block_group_cache *cache)
5534 struct btrfs_path *path;
5535 struct extent_buffer *leaf;
5536 struct btrfs_key key;
5540 path = btrfs_alloc_path();
5544 root = root->fs_info->extent_root;
5546 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5548 key.objectid = last;
5550 key.type = BTRFS_EXTENT_ITEM_KEY;
5552 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5557 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5558 ret = btrfs_next_leaf(root, path);
5566 leaf = path->nodes[0];
5567 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5568 if (key.objectid >= cache->key.offset + cache->key.objectid)
5570 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5571 key.type != BTRFS_METADATA_ITEM_KEY) {
5576 if (last == key.objectid) {
5577 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5578 last = key.objectid + key.offset;
5580 last = key.objectid + root->nodesize;
5585 ret = check_cache_range(root, cache, last,
5586 key.objectid - last);
5589 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5590 last = key.objectid + key.offset;
5592 last = key.objectid + root->nodesize;
5596 if (last < cache->key.objectid + cache->key.offset)
5597 ret = check_cache_range(root, cache, last,
5598 cache->key.objectid +
5599 cache->key.offset - last);
5602 btrfs_free_path(path);
5605 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5606 fprintf(stderr, "There are still entries left in the space "
5614 static int check_space_cache(struct btrfs_root *root)
5616 struct btrfs_block_group_cache *cache;
5617 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5621 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5622 btrfs_super_generation(root->fs_info->super_copy) !=
5623 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5624 printf("cache and super generation don't match, space cache "
5625 "will be invalidated\n");
5629 if (ctx.progress_enabled) {
5630 ctx.tp = TASK_FREE_SPACE;
5631 task_start(ctx.info);
5635 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5639 start = cache->key.objectid + cache->key.offset;
5640 if (!cache->free_space_ctl) {
5641 if (btrfs_init_free_space_ctl(cache,
5642 root->sectorsize)) {
5647 btrfs_remove_free_space_cache(cache);
5650 if (btrfs_fs_compat_ro(root->fs_info,
5651 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5652 ret = exclude_super_stripes(root, cache);
5654 fprintf(stderr, "could not exclude super stripes: %s\n",
5659 ret = load_free_space_tree(root->fs_info, cache);
5660 free_excluded_extents(root, cache);
5662 fprintf(stderr, "could not load free space tree: %s\n",
5669 ret = load_free_space_cache(root->fs_info, cache);
5674 ret = verify_space_cache(root, cache);
5676 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5677 cache->key.objectid);
5682 task_stop(ctx.info);
5684 return error ? -EINVAL : 0;
5687 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5688 u64 num_bytes, unsigned long leaf_offset,
5689 struct extent_buffer *eb) {
5692 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5694 unsigned long csum_offset;
5698 u64 data_checked = 0;
5704 if (num_bytes % root->sectorsize)
5707 data = malloc(num_bytes);
5711 while (offset < num_bytes) {
5714 read_len = num_bytes - offset;
5715 /* read as much space once a time */
5716 ret = read_extent_data(root, data + offset,
5717 bytenr + offset, &read_len, mirror);
5721 /* verify every 4k data's checksum */
5722 while (data_checked < read_len) {
5724 tmp = offset + data_checked;
5726 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5727 csum, root->sectorsize);
5728 btrfs_csum_final(csum, (char *)&csum);
5730 csum_offset = leaf_offset +
5731 tmp / root->sectorsize * csum_size;
5732 read_extent_buffer(eb, (char *)&csum_expected,
5733 csum_offset, csum_size);
5734 /* try another mirror */
5735 if (csum != csum_expected) {
5736 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5737 mirror, bytenr + tmp,
5738 csum, csum_expected);
5739 num_copies = btrfs_num_copies(
5740 &root->fs_info->mapping_tree,
5742 if (mirror < num_copies - 1) {
5747 data_checked += root->sectorsize;
5756 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5759 struct btrfs_path *path;
5760 struct extent_buffer *leaf;
5761 struct btrfs_key key;
5764 path = btrfs_alloc_path();
5766 fprintf(stderr, "Error allocating path\n");
5770 key.objectid = bytenr;
5771 key.type = BTRFS_EXTENT_ITEM_KEY;
5772 key.offset = (u64)-1;
5775 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5778 fprintf(stderr, "Error looking up extent record %d\n", ret);
5779 btrfs_free_path(path);
5782 if (path->slots[0] > 0) {
5785 ret = btrfs_prev_leaf(root, path);
5788 } else if (ret > 0) {
5795 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5798 * Block group items come before extent items if they have the same
5799 * bytenr, so walk back one more just in case. Dear future traveller,
5800 * first congrats on mastering time travel. Now if it's not too much
5801 * trouble could you go back to 2006 and tell Chris to make the
5802 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5803 * EXTENT_ITEM_KEY please?
5805 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5806 if (path->slots[0] > 0) {
5809 ret = btrfs_prev_leaf(root, path);
5812 } else if (ret > 0) {
5817 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5821 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5822 ret = btrfs_next_leaf(root, path);
5824 fprintf(stderr, "Error going to next leaf "
5826 btrfs_free_path(path);
5832 leaf = path->nodes[0];
5833 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5834 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5838 if (key.objectid + key.offset < bytenr) {
5842 if (key.objectid > bytenr + num_bytes)
5845 if (key.objectid == bytenr) {
5846 if (key.offset >= num_bytes) {
5850 num_bytes -= key.offset;
5851 bytenr += key.offset;
5852 } else if (key.objectid < bytenr) {
5853 if (key.objectid + key.offset >= bytenr + num_bytes) {
5857 num_bytes = (bytenr + num_bytes) -
5858 (key.objectid + key.offset);
5859 bytenr = key.objectid + key.offset;
5861 if (key.objectid + key.offset < bytenr + num_bytes) {
5862 u64 new_start = key.objectid + key.offset;
5863 u64 new_bytes = bytenr + num_bytes - new_start;
5866 * Weird case, the extent is in the middle of
5867 * our range, we'll have to search one side
5868 * and then the other. Not sure if this happens
5869 * in real life, but no harm in coding it up
5870 * anyway just in case.
5872 btrfs_release_path(path);
5873 ret = check_extent_exists(root, new_start,
5876 fprintf(stderr, "Right section didn't "
5880 num_bytes = key.objectid - bytenr;
5883 num_bytes = key.objectid - bytenr;
5890 if (num_bytes && !ret) {
5891 fprintf(stderr, "There are no extents for csum range "
5892 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5896 btrfs_free_path(path);
5900 static int check_csums(struct btrfs_root *root)
5902 struct btrfs_path *path;
5903 struct extent_buffer *leaf;
5904 struct btrfs_key key;
5905 u64 offset = 0, num_bytes = 0;
5906 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5910 unsigned long leaf_offset;
5912 root = root->fs_info->csum_root;
5913 if (!extent_buffer_uptodate(root->node)) {
5914 fprintf(stderr, "No valid csum tree found\n");
5918 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5919 key.type = BTRFS_EXTENT_CSUM_KEY;
5922 path = btrfs_alloc_path();
5926 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5928 fprintf(stderr, "Error searching csum tree %d\n", ret);
5929 btrfs_free_path(path);
5933 if (ret > 0 && path->slots[0])
5938 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5939 ret = btrfs_next_leaf(root, path);
5941 fprintf(stderr, "Error going to next leaf "
5948 leaf = path->nodes[0];
5950 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5951 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5956 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5957 csum_size) * root->sectorsize;
5958 if (!check_data_csum)
5959 goto skip_csum_check;
5960 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5961 ret = check_extent_csums(root, key.offset, data_len,
5967 offset = key.offset;
5968 } else if (key.offset != offset + num_bytes) {
5969 ret = check_extent_exists(root, offset, num_bytes);
5971 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5972 "there is no extent record\n",
5973 offset, offset+num_bytes);
5976 offset = key.offset;
5979 num_bytes += data_len;
5983 btrfs_free_path(path);
5987 static int is_dropped_key(struct btrfs_key *key,
5988 struct btrfs_key *drop_key) {
5989 if (key->objectid < drop_key->objectid)
5991 else if (key->objectid == drop_key->objectid) {
5992 if (key->type < drop_key->type)
5994 else if (key->type == drop_key->type) {
5995 if (key->offset < drop_key->offset)
6003 * Here are the rules for FULL_BACKREF.
6005 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6006 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6008 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6009 * if it happened after the relocation occurred since we'll have dropped the
6010 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6011 * have no real way to know for sure.
6013 * We process the blocks one root at a time, and we start from the lowest root
6014 * objectid and go to the highest. So we can just lookup the owner backref for
6015 * the record and if we don't find it then we know it doesn't exist and we have
6018 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6019 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6020 * be set or not and then we can check later once we've gathered all the refs.
6022 static int calc_extent_flag(struct btrfs_root *root,
6023 struct cache_tree *extent_cache,
6024 struct extent_buffer *buf,
6025 struct root_item_record *ri,
6028 struct extent_record *rec;
6029 struct cache_extent *cache;
6030 struct tree_backref *tback;
6033 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6034 /* we have added this extent before */
6036 rec = container_of(cache, struct extent_record, cache);
6039 * Except file/reloc tree, we can not have
6042 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6047 if (buf->start == ri->bytenr)
6050 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6053 owner = btrfs_header_owner(buf);
6054 if (owner == ri->objectid)
6057 tback = find_tree_backref(rec, 0, owner);
6062 if (rec->flag_block_full_backref != FLAG_UNSET &&
6063 rec->flag_block_full_backref != 0)
6064 rec->bad_full_backref = 1;
6067 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6068 if (rec->flag_block_full_backref != FLAG_UNSET &&
6069 rec->flag_block_full_backref != 1)
6070 rec->bad_full_backref = 1;
6074 static int run_next_block(struct btrfs_root *root,
6075 struct block_info *bits,
6078 struct cache_tree *pending,
6079 struct cache_tree *seen,
6080 struct cache_tree *reada,
6081 struct cache_tree *nodes,
6082 struct cache_tree *extent_cache,
6083 struct cache_tree *chunk_cache,
6084 struct rb_root *dev_cache,
6085 struct block_group_tree *block_group_cache,
6086 struct device_extent_tree *dev_extent_cache,
6087 struct root_item_record *ri)
6089 struct extent_buffer *buf;
6090 struct extent_record *rec = NULL;
6101 struct btrfs_key key;
6102 struct cache_extent *cache;
6105 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6106 bits_nr, &reada_bits);
6111 for(i = 0; i < nritems; i++) {
6112 ret = add_cache_extent(reada, bits[i].start,
6117 /* fixme, get the parent transid */
6118 readahead_tree_block(root, bits[i].start,
6122 *last = bits[0].start;
6123 bytenr = bits[0].start;
6124 size = bits[0].size;
6126 cache = lookup_cache_extent(pending, bytenr, size);
6128 remove_cache_extent(pending, cache);
6131 cache = lookup_cache_extent(reada, bytenr, size);
6133 remove_cache_extent(reada, cache);
6136 cache = lookup_cache_extent(nodes, bytenr, size);
6138 remove_cache_extent(nodes, cache);
6141 cache = lookup_cache_extent(extent_cache, bytenr, size);
6143 rec = container_of(cache, struct extent_record, cache);
6144 gen = rec->parent_generation;
6147 /* fixme, get the real parent transid */
6148 buf = read_tree_block(root, bytenr, size, gen);
6149 if (!extent_buffer_uptodate(buf)) {
6150 record_bad_block_io(root->fs_info,
6151 extent_cache, bytenr, size);
6155 nritems = btrfs_header_nritems(buf);
6158 if (!init_extent_tree) {
6159 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6160 btrfs_header_level(buf), 1, NULL,
6163 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6165 fprintf(stderr, "Couldn't calc extent flags\n");
6166 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6171 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6173 fprintf(stderr, "Couldn't calc extent flags\n");
6174 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6178 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6180 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6181 ri->objectid == btrfs_header_owner(buf)) {
6183 * Ok we got to this block from it's original owner and
6184 * we have FULL_BACKREF set. Relocation can leave
6185 * converted blocks over so this is altogether possible,
6186 * however it's not possible if the generation > the
6187 * last snapshot, so check for this case.
6189 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6190 btrfs_header_generation(buf) > ri->last_snapshot) {
6191 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6192 rec->bad_full_backref = 1;
6197 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6198 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6199 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200 rec->bad_full_backref = 1;
6204 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6205 rec->flag_block_full_backref = 1;
6209 rec->flag_block_full_backref = 0;
6211 owner = btrfs_header_owner(buf);
6214 ret = check_block(root, extent_cache, buf, flags);
6218 if (btrfs_is_leaf(buf)) {
6219 btree_space_waste += btrfs_leaf_free_space(root, buf);
6220 for (i = 0; i < nritems; i++) {
6221 struct btrfs_file_extent_item *fi;
6222 btrfs_item_key_to_cpu(buf, &key, i);
6223 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6224 process_extent_item(root, extent_cache, buf,
6228 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6229 process_extent_item(root, extent_cache, buf,
6233 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6235 btrfs_item_size_nr(buf, i);
6238 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6239 process_chunk_item(chunk_cache, &key, buf, i);
6242 if (key.type == BTRFS_DEV_ITEM_KEY) {
6243 process_device_item(dev_cache, &key, buf, i);
6246 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6247 process_block_group_item(block_group_cache,
6251 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6252 process_device_extent_item(dev_extent_cache,
6257 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6258 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6259 process_extent_ref_v0(extent_cache, buf, i);
6266 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6267 add_tree_backref(extent_cache, key.objectid, 0,
6271 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6272 add_tree_backref(extent_cache, key.objectid,
6276 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6277 struct btrfs_extent_data_ref *ref;
6278 ref = btrfs_item_ptr(buf, i,
6279 struct btrfs_extent_data_ref);
6280 add_data_backref(extent_cache,
6282 btrfs_extent_data_ref_root(buf, ref),
6283 btrfs_extent_data_ref_objectid(buf,
6285 btrfs_extent_data_ref_offset(buf, ref),
6286 btrfs_extent_data_ref_count(buf, ref),
6287 0, root->sectorsize);
6290 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6291 struct btrfs_shared_data_ref *ref;
6292 ref = btrfs_item_ptr(buf, i,
6293 struct btrfs_shared_data_ref);
6294 add_data_backref(extent_cache,
6295 key.objectid, key.offset, 0, 0, 0,
6296 btrfs_shared_data_ref_count(buf, ref),
6297 0, root->sectorsize);
6300 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6301 struct bad_item *bad;
6303 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6307 bad = malloc(sizeof(struct bad_item));
6310 INIT_LIST_HEAD(&bad->list);
6311 memcpy(&bad->key, &key,
6312 sizeof(struct btrfs_key));
6313 bad->root_id = owner;
6314 list_add_tail(&bad->list, &delete_items);
6317 if (key.type != BTRFS_EXTENT_DATA_KEY)
6319 fi = btrfs_item_ptr(buf, i,
6320 struct btrfs_file_extent_item);
6321 if (btrfs_file_extent_type(buf, fi) ==
6322 BTRFS_FILE_EXTENT_INLINE)
6324 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6327 data_bytes_allocated +=
6328 btrfs_file_extent_disk_num_bytes(buf, fi);
6329 if (data_bytes_allocated < root->sectorsize) {
6332 data_bytes_referenced +=
6333 btrfs_file_extent_num_bytes(buf, fi);
6334 add_data_backref(extent_cache,
6335 btrfs_file_extent_disk_bytenr(buf, fi),
6336 parent, owner, key.objectid, key.offset -
6337 btrfs_file_extent_offset(buf, fi), 1, 1,
6338 btrfs_file_extent_disk_num_bytes(buf, fi));
6342 struct btrfs_key first_key;
6344 first_key.objectid = 0;
6347 btrfs_item_key_to_cpu(buf, &first_key, 0);
6348 level = btrfs_header_level(buf);
6349 for (i = 0; i < nritems; i++) {
6350 struct extent_record tmpl;
6352 ptr = btrfs_node_blockptr(buf, i);
6353 size = root->nodesize;
6354 btrfs_node_key_to_cpu(buf, &key, i);
6356 if ((level == ri->drop_level)
6357 && is_dropped_key(&key, &ri->drop_key)) {
6362 memset(&tmpl, 0, sizeof(tmpl));
6363 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6364 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6369 tmpl.max_size = size;
6370 ret = add_extent_rec(extent_cache, &tmpl);
6373 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6376 add_pending(nodes, seen, ptr, size);
6378 add_pending(pending, seen, ptr, size);
6381 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6382 nritems) * sizeof(struct btrfs_key_ptr);
6384 total_btree_bytes += buf->len;
6385 if (fs_root_objectid(btrfs_header_owner(buf)))
6386 total_fs_tree_bytes += buf->len;
6387 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6388 total_extent_tree_bytes += buf->len;
6389 if (!found_old_backref &&
6390 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6391 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6392 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6393 found_old_backref = 1;
6395 free_extent_buffer(buf);
6399 static int add_root_to_pending(struct extent_buffer *buf,
6400 struct cache_tree *extent_cache,
6401 struct cache_tree *pending,
6402 struct cache_tree *seen,
6403 struct cache_tree *nodes,
6406 struct extent_record tmpl;
6408 if (btrfs_header_level(buf) > 0)
6409 add_pending(nodes, seen, buf->start, buf->len);
6411 add_pending(pending, seen, buf->start, buf->len);
6413 memset(&tmpl, 0, sizeof(tmpl));
6414 tmpl.start = buf->start;
6419 tmpl.max_size = buf->len;
6420 add_extent_rec(extent_cache, &tmpl);
6422 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6423 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6424 add_tree_backref(extent_cache, buf->start, buf->start,
6427 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6431 /* as we fix the tree, we might be deleting blocks that
6432 * we're tracking for repair. This hook makes sure we
6433 * remove any backrefs for blocks as we are fixing them.
6435 static int free_extent_hook(struct btrfs_trans_handle *trans,
6436 struct btrfs_root *root,
6437 u64 bytenr, u64 num_bytes, u64 parent,
6438 u64 root_objectid, u64 owner, u64 offset,
6441 struct extent_record *rec;
6442 struct cache_extent *cache;
6444 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6446 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6447 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6451 rec = container_of(cache, struct extent_record, cache);
6453 struct data_backref *back;
6454 back = find_data_backref(rec, parent, root_objectid, owner,
6455 offset, 1, bytenr, num_bytes);
6458 if (back->node.found_ref) {
6459 back->found_ref -= refs_to_drop;
6461 rec->refs -= refs_to_drop;
6463 if (back->node.found_extent_tree) {
6464 back->num_refs -= refs_to_drop;
6465 if (rec->extent_item_refs)
6466 rec->extent_item_refs -= refs_to_drop;
6468 if (back->found_ref == 0)
6469 back->node.found_ref = 0;
6470 if (back->num_refs == 0)
6471 back->node.found_extent_tree = 0;
6473 if (!back->node.found_extent_tree && back->node.found_ref) {
6474 list_del(&back->node.list);
6478 struct tree_backref *back;
6479 back = find_tree_backref(rec, parent, root_objectid);
6482 if (back->node.found_ref) {
6485 back->node.found_ref = 0;
6487 if (back->node.found_extent_tree) {
6488 if (rec->extent_item_refs)
6489 rec->extent_item_refs--;
6490 back->node.found_extent_tree = 0;
6492 if (!back->node.found_extent_tree && back->node.found_ref) {
6493 list_del(&back->node.list);
6497 maybe_free_extent_rec(extent_cache, rec);
6502 static int delete_extent_records(struct btrfs_trans_handle *trans,
6503 struct btrfs_root *root,
6504 struct btrfs_path *path,
6505 u64 bytenr, u64 new_len)
6507 struct btrfs_key key;
6508 struct btrfs_key found_key;
6509 struct extent_buffer *leaf;
6514 key.objectid = bytenr;
6516 key.offset = (u64)-1;
6519 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6526 if (path->slots[0] == 0)
6532 leaf = path->nodes[0];
6533 slot = path->slots[0];
6535 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6536 if (found_key.objectid != bytenr)
6539 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6540 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6541 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6542 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6543 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6544 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6545 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6546 btrfs_release_path(path);
6547 if (found_key.type == 0) {
6548 if (found_key.offset == 0)
6550 key.offset = found_key.offset - 1;
6551 key.type = found_key.type;
6553 key.type = found_key.type - 1;
6554 key.offset = (u64)-1;
6558 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6559 found_key.objectid, found_key.type, found_key.offset);
6561 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6564 btrfs_release_path(path);
6566 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6567 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6568 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6569 found_key.offset : root->nodesize;
6571 ret = btrfs_update_block_group(trans, root, bytenr,
6578 btrfs_release_path(path);
6583 * for a single backref, this will allocate a new extent
6584 * and add the backref to it.
6586 static int record_extent(struct btrfs_trans_handle *trans,
6587 struct btrfs_fs_info *info,
6588 struct btrfs_path *path,
6589 struct extent_record *rec,
6590 struct extent_backref *back,
6591 int allocated, u64 flags)
6594 struct btrfs_root *extent_root = info->extent_root;
6595 struct extent_buffer *leaf;
6596 struct btrfs_key ins_key;
6597 struct btrfs_extent_item *ei;
6598 struct tree_backref *tback;
6599 struct data_backref *dback;
6600 struct btrfs_tree_block_info *bi;
6603 rec->max_size = max_t(u64, rec->max_size,
6604 info->extent_root->nodesize);
6607 u32 item_size = sizeof(*ei);
6610 item_size += sizeof(*bi);
6612 ins_key.objectid = rec->start;
6613 ins_key.offset = rec->max_size;
6614 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6616 ret = btrfs_insert_empty_item(trans, extent_root, path,
6617 &ins_key, item_size);
6621 leaf = path->nodes[0];
6622 ei = btrfs_item_ptr(leaf, path->slots[0],
6623 struct btrfs_extent_item);
6625 btrfs_set_extent_refs(leaf, ei, 0);
6626 btrfs_set_extent_generation(leaf, ei, rec->generation);
6628 if (back->is_data) {
6629 btrfs_set_extent_flags(leaf, ei,
6630 BTRFS_EXTENT_FLAG_DATA);
6632 struct btrfs_disk_key copy_key;;
6634 tback = to_tree_backref(back);
6635 bi = (struct btrfs_tree_block_info *)(ei + 1);
6636 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6639 btrfs_set_disk_key_objectid(©_key,
6640 rec->info_objectid);
6641 btrfs_set_disk_key_type(©_key, 0);
6642 btrfs_set_disk_key_offset(©_key, 0);
6644 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6645 btrfs_set_tree_block_key(leaf, bi, ©_key);
6647 btrfs_set_extent_flags(leaf, ei,
6648 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6651 btrfs_mark_buffer_dirty(leaf);
6652 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6653 rec->max_size, 1, 0);
6656 btrfs_release_path(path);
6659 if (back->is_data) {
6663 dback = to_data_backref(back);
6664 if (back->full_backref)
6665 parent = dback->parent;
6669 for (i = 0; i < dback->found_ref; i++) {
6670 /* if parent != 0, we're doing a full backref
6671 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6672 * just makes the backref allocator create a data
6675 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6676 rec->start, rec->max_size,
6680 BTRFS_FIRST_FREE_OBJECTID :
6686 fprintf(stderr, "adding new data backref"
6687 " on %llu %s %llu owner %llu"
6688 " offset %llu found %d\n",
6689 (unsigned long long)rec->start,
6690 back->full_backref ?
6692 back->full_backref ?
6693 (unsigned long long)parent :
6694 (unsigned long long)dback->root,
6695 (unsigned long long)dback->owner,
6696 (unsigned long long)dback->offset,
6701 tback = to_tree_backref(back);
6702 if (back->full_backref)
6703 parent = tback->parent;
6707 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6708 rec->start, rec->max_size,
6709 parent, tback->root, 0, 0);
6710 fprintf(stderr, "adding new tree backref on "
6711 "start %llu len %llu parent %llu root %llu\n",
6712 rec->start, rec->max_size, parent, tback->root);
6715 btrfs_release_path(path);
6719 static struct extent_entry *find_entry(struct list_head *entries,
6720 u64 bytenr, u64 bytes)
6722 struct extent_entry *entry = NULL;
6724 list_for_each_entry(entry, entries, list) {
6725 if (entry->bytenr == bytenr && entry->bytes == bytes)
6732 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6734 struct extent_entry *entry, *best = NULL, *prev = NULL;
6736 list_for_each_entry(entry, entries, list) {
6743 * If there are as many broken entries as entries then we know
6744 * not to trust this particular entry.
6746 if (entry->broken == entry->count)
6750 * If our current entry == best then we can't be sure our best
6751 * is really the best, so we need to keep searching.
6753 if (best && best->count == entry->count) {
6759 /* Prev == entry, not good enough, have to keep searching */
6760 if (!prev->broken && prev->count == entry->count)
6764 best = (prev->count > entry->count) ? prev : entry;
6765 else if (best->count < entry->count)
6773 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6774 struct data_backref *dback, struct extent_entry *entry)
6776 struct btrfs_trans_handle *trans;
6777 struct btrfs_root *root;
6778 struct btrfs_file_extent_item *fi;
6779 struct extent_buffer *leaf;
6780 struct btrfs_key key;
6784 key.objectid = dback->root;
6785 key.type = BTRFS_ROOT_ITEM_KEY;
6786 key.offset = (u64)-1;
6787 root = btrfs_read_fs_root(info, &key);
6789 fprintf(stderr, "Couldn't find root for our ref\n");
6794 * The backref points to the original offset of the extent if it was
6795 * split, so we need to search down to the offset we have and then walk
6796 * forward until we find the backref we're looking for.
6798 key.objectid = dback->owner;
6799 key.type = BTRFS_EXTENT_DATA_KEY;
6800 key.offset = dback->offset;
6801 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6803 fprintf(stderr, "Error looking up ref %d\n", ret);
6808 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6809 ret = btrfs_next_leaf(root, path);
6811 fprintf(stderr, "Couldn't find our ref, next\n");
6815 leaf = path->nodes[0];
6816 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6817 if (key.objectid != dback->owner ||
6818 key.type != BTRFS_EXTENT_DATA_KEY) {
6819 fprintf(stderr, "Couldn't find our ref, search\n");
6822 fi = btrfs_item_ptr(leaf, path->slots[0],
6823 struct btrfs_file_extent_item);
6824 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6825 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6827 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6832 btrfs_release_path(path);
6834 trans = btrfs_start_transaction(root, 1);
6836 return PTR_ERR(trans);
6839 * Ok we have the key of the file extent we want to fix, now we can cow
6840 * down to the thing and fix it.
6842 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6844 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6845 key.objectid, key.type, key.offset, ret);
6849 fprintf(stderr, "Well that's odd, we just found this key "
6850 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6855 leaf = path->nodes[0];
6856 fi = btrfs_item_ptr(leaf, path->slots[0],
6857 struct btrfs_file_extent_item);
6859 if (btrfs_file_extent_compression(leaf, fi) &&
6860 dback->disk_bytenr != entry->bytenr) {
6861 fprintf(stderr, "Ref doesn't match the record start and is "
6862 "compressed, please take a btrfs-image of this file "
6863 "system and send it to a btrfs developer so they can "
6864 "complete this functionality for bytenr %Lu\n",
6865 dback->disk_bytenr);
6870 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6871 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6872 } else if (dback->disk_bytenr > entry->bytenr) {
6873 u64 off_diff, offset;
6875 off_diff = dback->disk_bytenr - entry->bytenr;
6876 offset = btrfs_file_extent_offset(leaf, fi);
6877 if (dback->disk_bytenr + offset +
6878 btrfs_file_extent_num_bytes(leaf, fi) >
6879 entry->bytenr + entry->bytes) {
6880 fprintf(stderr, "Ref is past the entry end, please "
6881 "take a btrfs-image of this file system and "
6882 "send it to a btrfs developer, ref %Lu\n",
6883 dback->disk_bytenr);
6888 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6889 btrfs_set_file_extent_offset(leaf, fi, offset);
6890 } else if (dback->disk_bytenr < entry->bytenr) {
6893 offset = btrfs_file_extent_offset(leaf, fi);
6894 if (dback->disk_bytenr + offset < entry->bytenr) {
6895 fprintf(stderr, "Ref is before the entry start, please"
6896 " take a btrfs-image of this file system and "
6897 "send it to a btrfs developer, ref %Lu\n",
6898 dback->disk_bytenr);
6903 offset += dback->disk_bytenr;
6904 offset -= entry->bytenr;
6905 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6906 btrfs_set_file_extent_offset(leaf, fi, offset);
6909 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6912 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6913 * only do this if we aren't using compression, otherwise it's a
6916 if (!btrfs_file_extent_compression(leaf, fi))
6917 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6919 printf("ram bytes may be wrong?\n");
6920 btrfs_mark_buffer_dirty(leaf);
6922 err = btrfs_commit_transaction(trans, root);
6923 btrfs_release_path(path);
6924 return ret ? ret : err;
6927 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6928 struct extent_record *rec)
6930 struct extent_backref *back;
6931 struct data_backref *dback;
6932 struct extent_entry *entry, *best = NULL;
6935 int broken_entries = 0;
6940 * Metadata is easy and the backrefs should always agree on bytenr and
6941 * size, if not we've got bigger issues.
6946 list_for_each_entry(back, &rec->backrefs, list) {
6947 if (back->full_backref || !back->is_data)
6950 dback = to_data_backref(back);
6953 * We only pay attention to backrefs that we found a real
6956 if (dback->found_ref == 0)
6960 * For now we only catch when the bytes don't match, not the
6961 * bytenr. We can easily do this at the same time, but I want
6962 * to have a fs image to test on before we just add repair
6963 * functionality willy-nilly so we know we won't screw up the
6967 entry = find_entry(&entries, dback->disk_bytenr,
6970 entry = malloc(sizeof(struct extent_entry));
6975 memset(entry, 0, sizeof(*entry));
6976 entry->bytenr = dback->disk_bytenr;
6977 entry->bytes = dback->bytes;
6978 list_add_tail(&entry->list, &entries);
6983 * If we only have on entry we may think the entries agree when
6984 * in reality they don't so we have to do some extra checking.
6986 if (dback->disk_bytenr != rec->start ||
6987 dback->bytes != rec->nr || back->broken)
6998 /* Yay all the backrefs agree, carry on good sir */
6999 if (nr_entries <= 1 && !mismatch)
7002 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7003 "%Lu\n", rec->start);
7006 * First we want to see if the backrefs can agree amongst themselves who
7007 * is right, so figure out which one of the entries has the highest
7010 best = find_most_right_entry(&entries);
7013 * Ok so we may have an even split between what the backrefs think, so
7014 * this is where we use the extent ref to see what it thinks.
7017 entry = find_entry(&entries, rec->start, rec->nr);
7018 if (!entry && (!broken_entries || !rec->found_rec)) {
7019 fprintf(stderr, "Backrefs don't agree with each other "
7020 "and extent record doesn't agree with anybody,"
7021 " so we can't fix bytenr %Lu bytes %Lu\n",
7022 rec->start, rec->nr);
7025 } else if (!entry) {
7027 * Ok our backrefs were broken, we'll assume this is the
7028 * correct value and add an entry for this range.
7030 entry = malloc(sizeof(struct extent_entry));
7035 memset(entry, 0, sizeof(*entry));
7036 entry->bytenr = rec->start;
7037 entry->bytes = rec->nr;
7038 list_add_tail(&entry->list, &entries);
7042 best = find_most_right_entry(&entries);
7044 fprintf(stderr, "Backrefs and extent record evenly "
7045 "split on who is right, this is going to "
7046 "require user input to fix bytenr %Lu bytes "
7047 "%Lu\n", rec->start, rec->nr);
7054 * I don't think this can happen currently as we'll abort() if we catch
7055 * this case higher up, but in case somebody removes that we still can't
7056 * deal with it properly here yet, so just bail out of that's the case.
7058 if (best->bytenr != rec->start) {
7059 fprintf(stderr, "Extent start and backref starts don't match, "
7060 "please use btrfs-image on this file system and send "
7061 "it to a btrfs developer so they can make fsck fix "
7062 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7063 rec->start, rec->nr);
7069 * Ok great we all agreed on an extent record, let's go find the real
7070 * references and fix up the ones that don't match.
7072 list_for_each_entry(back, &rec->backrefs, list) {
7073 if (back->full_backref || !back->is_data)
7076 dback = to_data_backref(back);
7079 * Still ignoring backrefs that don't have a real ref attached
7082 if (dback->found_ref == 0)
7085 if (dback->bytes == best->bytes &&
7086 dback->disk_bytenr == best->bytenr)
7089 ret = repair_ref(info, path, dback, best);
7095 * Ok we messed with the actual refs, which means we need to drop our
7096 * entire cache and go back and rescan. I know this is a huge pain and
7097 * adds a lot of extra work, but it's the only way to be safe. Once all
7098 * the backrefs agree we may not need to do anything to the extent
7103 while (!list_empty(&entries)) {
7104 entry = list_entry(entries.next, struct extent_entry, list);
7105 list_del_init(&entry->list);
7111 static int process_duplicates(struct btrfs_root *root,
7112 struct cache_tree *extent_cache,
7113 struct extent_record *rec)
7115 struct extent_record *good, *tmp;
7116 struct cache_extent *cache;
7120 * If we found a extent record for this extent then return, or if we
7121 * have more than one duplicate we are likely going to need to delete
7124 if (rec->found_rec || rec->num_duplicates > 1)
7127 /* Shouldn't happen but just in case */
7128 BUG_ON(!rec->num_duplicates);
7131 * So this happens if we end up with a backref that doesn't match the
7132 * actual extent entry. So either the backref is bad or the extent
7133 * entry is bad. Either way we want to have the extent_record actually
7134 * reflect what we found in the extent_tree, so we need to take the
7135 * duplicate out and use that as the extent_record since the only way we
7136 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7138 remove_cache_extent(extent_cache, &rec->cache);
7140 good = to_extent_record(rec->dups.next);
7141 list_del_init(&good->list);
7142 INIT_LIST_HEAD(&good->backrefs);
7143 INIT_LIST_HEAD(&good->dups);
7144 good->cache.start = good->start;
7145 good->cache.size = good->nr;
7146 good->content_checked = 0;
7147 good->owner_ref_checked = 0;
7148 good->num_duplicates = 0;
7149 good->refs = rec->refs;
7150 list_splice_init(&rec->backrefs, &good->backrefs);
7152 cache = lookup_cache_extent(extent_cache, good->start,
7156 tmp = container_of(cache, struct extent_record, cache);
7159 * If we find another overlapping extent and it's found_rec is
7160 * set then it's a duplicate and we need to try and delete
7163 if (tmp->found_rec || tmp->num_duplicates > 0) {
7164 if (list_empty(&good->list))
7165 list_add_tail(&good->list,
7166 &duplicate_extents);
7167 good->num_duplicates += tmp->num_duplicates + 1;
7168 list_splice_init(&tmp->dups, &good->dups);
7169 list_del_init(&tmp->list);
7170 list_add_tail(&tmp->list, &good->dups);
7171 remove_cache_extent(extent_cache, &tmp->cache);
7176 * Ok we have another non extent item backed extent rec, so lets
7177 * just add it to this extent and carry on like we did above.
7179 good->refs += tmp->refs;
7180 list_splice_init(&tmp->backrefs, &good->backrefs);
7181 remove_cache_extent(extent_cache, &tmp->cache);
7184 ret = insert_cache_extent(extent_cache, &good->cache);
7187 return good->num_duplicates ? 0 : 1;
7190 static int delete_duplicate_records(struct btrfs_root *root,
7191 struct extent_record *rec)
7193 struct btrfs_trans_handle *trans;
7194 LIST_HEAD(delete_list);
7195 struct btrfs_path *path;
7196 struct extent_record *tmp, *good, *n;
7199 struct btrfs_key key;
7201 path = btrfs_alloc_path();
7208 /* Find the record that covers all of the duplicates. */
7209 list_for_each_entry(tmp, &rec->dups, list) {
7210 if (good->start < tmp->start)
7212 if (good->nr > tmp->nr)
7215 if (tmp->start + tmp->nr < good->start + good->nr) {
7216 fprintf(stderr, "Ok we have overlapping extents that "
7217 "aren't completely covered by each other, this "
7218 "is going to require more careful thought. "
7219 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7220 tmp->start, tmp->nr, good->start, good->nr);
7227 list_add_tail(&rec->list, &delete_list);
7229 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7232 list_move_tail(&tmp->list, &delete_list);
7235 root = root->fs_info->extent_root;
7236 trans = btrfs_start_transaction(root, 1);
7237 if (IS_ERR(trans)) {
7238 ret = PTR_ERR(trans);
7242 list_for_each_entry(tmp, &delete_list, list) {
7243 if (tmp->found_rec == 0)
7245 key.objectid = tmp->start;
7246 key.type = BTRFS_EXTENT_ITEM_KEY;
7247 key.offset = tmp->nr;
7249 /* Shouldn't happen but just in case */
7250 if (tmp->metadata) {
7251 fprintf(stderr, "Well this shouldn't happen, extent "
7252 "record overlaps but is metadata? "
7253 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7257 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7263 ret = btrfs_del_item(trans, root, path);
7266 btrfs_release_path(path);
7269 err = btrfs_commit_transaction(trans, root);
7273 while (!list_empty(&delete_list)) {
7274 tmp = to_extent_record(delete_list.next);
7275 list_del_init(&tmp->list);
7281 while (!list_empty(&rec->dups)) {
7282 tmp = to_extent_record(rec->dups.next);
7283 list_del_init(&tmp->list);
7287 btrfs_free_path(path);
7289 if (!ret && !nr_del)
7290 rec->num_duplicates = 0;
7292 return ret ? ret : nr_del;
7295 static int find_possible_backrefs(struct btrfs_fs_info *info,
7296 struct btrfs_path *path,
7297 struct cache_tree *extent_cache,
7298 struct extent_record *rec)
7300 struct btrfs_root *root;
7301 struct extent_backref *back;
7302 struct data_backref *dback;
7303 struct cache_extent *cache;
7304 struct btrfs_file_extent_item *fi;
7305 struct btrfs_key key;
7309 list_for_each_entry(back, &rec->backrefs, list) {
7310 /* Don't care about full backrefs (poor unloved backrefs) */
7311 if (back->full_backref || !back->is_data)
7314 dback = to_data_backref(back);
7316 /* We found this one, we don't need to do a lookup */
7317 if (dback->found_ref)
7320 key.objectid = dback->root;
7321 key.type = BTRFS_ROOT_ITEM_KEY;
7322 key.offset = (u64)-1;
7324 root = btrfs_read_fs_root(info, &key);
7326 /* No root, definitely a bad ref, skip */
7327 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7329 /* Other err, exit */
7331 return PTR_ERR(root);
7333 key.objectid = dback->owner;
7334 key.type = BTRFS_EXTENT_DATA_KEY;
7335 key.offset = dback->offset;
7336 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7338 btrfs_release_path(path);
7341 /* Didn't find it, we can carry on */
7346 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7347 struct btrfs_file_extent_item);
7348 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7349 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7350 btrfs_release_path(path);
7351 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7353 struct extent_record *tmp;
7354 tmp = container_of(cache, struct extent_record, cache);
7357 * If we found an extent record for the bytenr for this
7358 * particular backref then we can't add it to our
7359 * current extent record. We only want to add backrefs
7360 * that don't have a corresponding extent item in the
7361 * extent tree since they likely belong to this record
7362 * and we need to fix it if it doesn't match bytenrs.
7368 dback->found_ref += 1;
7369 dback->disk_bytenr = bytenr;
7370 dback->bytes = bytes;
7373 * Set this so the verify backref code knows not to trust the
7374 * values in this backref.
7383 * Record orphan data ref into corresponding root.
7385 * Return 0 if the extent item contains data ref and recorded.
7386 * Return 1 if the extent item contains no useful data ref
7387 * On that case, it may contains only shared_dataref or metadata backref
7388 * or the file extent exists(this should be handled by the extent bytenr
7390 * Return <0 if something goes wrong.
7392 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7393 struct extent_record *rec)
7395 struct btrfs_key key;
7396 struct btrfs_root *dest_root;
7397 struct extent_backref *back;
7398 struct data_backref *dback;
7399 struct orphan_data_extent *orphan;
7400 struct btrfs_path *path;
7401 int recorded_data_ref = 0;
7406 path = btrfs_alloc_path();
7409 list_for_each_entry(back, &rec->backrefs, list) {
7410 if (back->full_backref || !back->is_data ||
7411 !back->found_extent_tree)
7413 dback = to_data_backref(back);
7414 if (dback->found_ref)
7416 key.objectid = dback->root;
7417 key.type = BTRFS_ROOT_ITEM_KEY;
7418 key.offset = (u64)-1;
7420 dest_root = btrfs_read_fs_root(fs_info, &key);
7422 /* For non-exist root we just skip it */
7423 if (IS_ERR(dest_root) || !dest_root)
7426 key.objectid = dback->owner;
7427 key.type = BTRFS_EXTENT_DATA_KEY;
7428 key.offset = dback->offset;
7430 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7432 * For ret < 0, it's OK since the fs-tree may be corrupted,
7433 * we need to record it for inode/file extent rebuild.
7434 * For ret > 0, we record it only for file extent rebuild.
7435 * For ret == 0, the file extent exists but only bytenr
7436 * mismatch, let the original bytenr fix routine to handle,
7442 orphan = malloc(sizeof(*orphan));
7447 INIT_LIST_HEAD(&orphan->list);
7448 orphan->root = dback->root;
7449 orphan->objectid = dback->owner;
7450 orphan->offset = dback->offset;
7451 orphan->disk_bytenr = rec->cache.start;
7452 orphan->disk_len = rec->cache.size;
7453 list_add(&dest_root->orphan_data_extents, &orphan->list);
7454 recorded_data_ref = 1;
7457 btrfs_free_path(path);
7459 return !recorded_data_ref;
7465 * when an incorrect extent item is found, this will delete
7466 * all of the existing entries for it and recreate them
7467 * based on what the tree scan found.
7469 static int fixup_extent_refs(struct btrfs_fs_info *info,
7470 struct cache_tree *extent_cache,
7471 struct extent_record *rec)
7473 struct btrfs_trans_handle *trans = NULL;
7475 struct btrfs_path *path;
7476 struct list_head *cur = rec->backrefs.next;
7477 struct cache_extent *cache;
7478 struct extent_backref *back;
7482 if (rec->flag_block_full_backref)
7483 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7485 path = btrfs_alloc_path();
7489 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7491 * Sometimes the backrefs themselves are so broken they don't
7492 * get attached to any meaningful rec, so first go back and
7493 * check any of our backrefs that we couldn't find and throw
7494 * them into the list if we find the backref so that
7495 * verify_backrefs can figure out what to do.
7497 ret = find_possible_backrefs(info, path, extent_cache, rec);
7502 /* step one, make sure all of the backrefs agree */
7503 ret = verify_backrefs(info, path, rec);
7507 trans = btrfs_start_transaction(info->extent_root, 1);
7508 if (IS_ERR(trans)) {
7509 ret = PTR_ERR(trans);
7513 /* step two, delete all the existing records */
7514 ret = delete_extent_records(trans, info->extent_root, path,
7515 rec->start, rec->max_size);
7520 /* was this block corrupt? If so, don't add references to it */
7521 cache = lookup_cache_extent(info->corrupt_blocks,
7522 rec->start, rec->max_size);
7528 /* step three, recreate all the refs we did find */
7529 while(cur != &rec->backrefs) {
7530 back = to_extent_backref(cur);
7534 * if we didn't find any references, don't create a
7537 if (!back->found_ref)
7540 rec->bad_full_backref = 0;
7541 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7549 int err = btrfs_commit_transaction(trans, info->extent_root);
7554 btrfs_free_path(path);
7558 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7559 struct extent_record *rec)
7561 struct btrfs_trans_handle *trans;
7562 struct btrfs_root *root = fs_info->extent_root;
7563 struct btrfs_path *path;
7564 struct btrfs_extent_item *ei;
7565 struct btrfs_key key;
7569 key.objectid = rec->start;
7570 if (rec->metadata) {
7571 key.type = BTRFS_METADATA_ITEM_KEY;
7572 key.offset = rec->info_level;
7574 key.type = BTRFS_EXTENT_ITEM_KEY;
7575 key.offset = rec->max_size;
7578 path = btrfs_alloc_path();
7582 trans = btrfs_start_transaction(root, 0);
7583 if (IS_ERR(trans)) {
7584 btrfs_free_path(path);
7585 return PTR_ERR(trans);
7588 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7590 btrfs_free_path(path);
7591 btrfs_commit_transaction(trans, root);
7594 fprintf(stderr, "Didn't find extent for %llu\n",
7595 (unsigned long long)rec->start);
7596 btrfs_free_path(path);
7597 btrfs_commit_transaction(trans, root);
7601 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7602 struct btrfs_extent_item);
7603 flags = btrfs_extent_flags(path->nodes[0], ei);
7604 if (rec->flag_block_full_backref) {
7605 fprintf(stderr, "setting full backref on %llu\n",
7606 (unsigned long long)key.objectid);
7607 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7609 fprintf(stderr, "clearing full backref on %llu\n",
7610 (unsigned long long)key.objectid);
7611 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7613 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7614 btrfs_mark_buffer_dirty(path->nodes[0]);
7615 btrfs_free_path(path);
7616 return btrfs_commit_transaction(trans, root);
7619 /* right now we only prune from the extent allocation tree */
7620 static int prune_one_block(struct btrfs_trans_handle *trans,
7621 struct btrfs_fs_info *info,
7622 struct btrfs_corrupt_block *corrupt)
7625 struct btrfs_path path;
7626 struct extent_buffer *eb;
7630 int level = corrupt->level + 1;
7632 btrfs_init_path(&path);
7634 /* we want to stop at the parent to our busted block */
7635 path.lowest_level = level;
7637 ret = btrfs_search_slot(trans, info->extent_root,
7638 &corrupt->key, &path, -1, 1);
7643 eb = path.nodes[level];
7650 * hopefully the search gave us the block we want to prune,
7651 * lets try that first
7653 slot = path.slots[level];
7654 found = btrfs_node_blockptr(eb, slot);
7655 if (found == corrupt->cache.start)
7658 nritems = btrfs_header_nritems(eb);
7660 /* the search failed, lets scan this node and hope we find it */
7661 for (slot = 0; slot < nritems; slot++) {
7662 found = btrfs_node_blockptr(eb, slot);
7663 if (found == corrupt->cache.start)
7667 * we couldn't find the bad block. TODO, search all the nodes for pointers
7670 if (eb == info->extent_root->node) {
7675 btrfs_release_path(&path);
7680 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7681 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7684 btrfs_release_path(&path);
7688 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7690 struct btrfs_trans_handle *trans = NULL;
7691 struct cache_extent *cache;
7692 struct btrfs_corrupt_block *corrupt;
7695 cache = search_cache_extent(info->corrupt_blocks, 0);
7699 trans = btrfs_start_transaction(info->extent_root, 1);
7701 return PTR_ERR(trans);
7703 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7704 prune_one_block(trans, info, corrupt);
7705 remove_cache_extent(info->corrupt_blocks, cache);
7708 return btrfs_commit_transaction(trans, info->extent_root);
7712 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7714 struct btrfs_block_group_cache *cache;
7719 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7720 &start, &end, EXTENT_DIRTY);
7723 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7729 cache = btrfs_lookup_first_block_group(fs_info, start);
7734 start = cache->key.objectid + cache->key.offset;
7738 static int check_extent_refs(struct btrfs_root *root,
7739 struct cache_tree *extent_cache)
7741 struct extent_record *rec;
7742 struct cache_extent *cache;
7751 * if we're doing a repair, we have to make sure
7752 * we don't allocate from the problem extents.
7753 * In the worst case, this will be all the
7756 cache = search_cache_extent(extent_cache, 0);
7758 rec = container_of(cache, struct extent_record, cache);
7759 set_extent_dirty(root->fs_info->excluded_extents,
7761 rec->start + rec->max_size - 1,
7763 cache = next_cache_extent(cache);
7766 /* pin down all the corrupted blocks too */
7767 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7769 set_extent_dirty(root->fs_info->excluded_extents,
7771 cache->start + cache->size - 1,
7773 cache = next_cache_extent(cache);
7775 prune_corrupt_blocks(root->fs_info);
7776 reset_cached_block_groups(root->fs_info);
7779 reset_cached_block_groups(root->fs_info);
7782 * We need to delete any duplicate entries we find first otherwise we
7783 * could mess up the extent tree when we have backrefs that actually
7784 * belong to a different extent item and not the weird duplicate one.
7786 while (repair && !list_empty(&duplicate_extents)) {
7787 rec = to_extent_record(duplicate_extents.next);
7788 list_del_init(&rec->list);
7790 /* Sometimes we can find a backref before we find an actual
7791 * extent, so we need to process it a little bit to see if there
7792 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7793 * if this is a backref screwup. If we need to delete stuff
7794 * process_duplicates() will return 0, otherwise it will return
7797 if (process_duplicates(root, extent_cache, rec))
7799 ret = delete_duplicate_records(root, rec);
7803 * delete_duplicate_records will return the number of entries
7804 * deleted, so if it's greater than 0 then we know we actually
7805 * did something and we need to remove.
7819 cache = search_cache_extent(extent_cache, 0);
7822 rec = container_of(cache, struct extent_record, cache);
7823 if (rec->num_duplicates) {
7824 fprintf(stderr, "extent item %llu has multiple extent "
7825 "items\n", (unsigned long long)rec->start);
7830 if (rec->refs != rec->extent_item_refs) {
7831 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7832 (unsigned long long)rec->start,
7833 (unsigned long long)rec->nr);
7834 fprintf(stderr, "extent item %llu, found %llu\n",
7835 (unsigned long long)rec->extent_item_refs,
7836 (unsigned long long)rec->refs);
7837 ret = record_orphan_data_extents(root->fs_info, rec);
7844 * we can't use the extent to repair file
7845 * extent, let the fallback method handle it.
7847 if (!fixed && repair) {
7848 ret = fixup_extent_refs(
7859 if (all_backpointers_checked(rec, 1)) {
7860 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7861 (unsigned long long)rec->start,
7862 (unsigned long long)rec->nr);
7864 if (!fixed && !recorded && repair) {
7865 ret = fixup_extent_refs(root->fs_info,
7874 if (!rec->owner_ref_checked) {
7875 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7876 (unsigned long long)rec->start,
7877 (unsigned long long)rec->nr);
7878 if (!fixed && !recorded && repair) {
7879 ret = fixup_extent_refs(root->fs_info,
7888 if (rec->bad_full_backref) {
7889 fprintf(stderr, "bad full backref, on [%llu]\n",
7890 (unsigned long long)rec->start);
7892 ret = fixup_extent_flags(root->fs_info, rec);
7901 * Although it's not a extent ref's problem, we reuse this
7902 * routine for error reporting.
7903 * No repair function yet.
7905 if (rec->crossing_stripes) {
7907 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7908 rec->start, rec->start + rec->max_size);
7913 if (rec->wrong_chunk_type) {
7915 "bad extent [%llu, %llu), type mismatch with chunk\n",
7916 rec->start, rec->start + rec->max_size);
7921 remove_cache_extent(extent_cache, cache);
7922 free_all_extent_backrefs(rec);
7923 if (!init_extent_tree && repair && (!cur_err || fixed))
7924 clear_extent_dirty(root->fs_info->excluded_extents,
7926 rec->start + rec->max_size - 1,
7932 if (ret && ret != -EAGAIN) {
7933 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7936 struct btrfs_trans_handle *trans;
7938 root = root->fs_info->extent_root;
7939 trans = btrfs_start_transaction(root, 1);
7940 if (IS_ERR(trans)) {
7941 ret = PTR_ERR(trans);
7945 btrfs_fix_block_accounting(trans, root);
7946 ret = btrfs_commit_transaction(trans, root);
7951 fprintf(stderr, "repaired damaged extent references\n");
7957 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7961 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7962 stripe_size = length;
7963 stripe_size /= num_stripes;
7964 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7965 stripe_size = length * 2;
7966 stripe_size /= num_stripes;
7967 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7968 stripe_size = length;
7969 stripe_size /= (num_stripes - 1);
7970 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7971 stripe_size = length;
7972 stripe_size /= (num_stripes - 2);
7974 stripe_size = length;
7980 * Check the chunk with its block group/dev list ref:
7981 * Return 0 if all refs seems valid.
7982 * Return 1 if part of refs seems valid, need later check for rebuild ref
7983 * like missing block group and needs to search extent tree to rebuild them.
7984 * Return -1 if essential refs are missing and unable to rebuild.
7986 static int check_chunk_refs(struct chunk_record *chunk_rec,
7987 struct block_group_tree *block_group_cache,
7988 struct device_extent_tree *dev_extent_cache,
7991 struct cache_extent *block_group_item;
7992 struct block_group_record *block_group_rec;
7993 struct cache_extent *dev_extent_item;
7994 struct device_extent_record *dev_extent_rec;
7998 int metadump_v2 = 0;
8002 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8005 if (block_group_item) {
8006 block_group_rec = container_of(block_group_item,
8007 struct block_group_record,
8009 if (chunk_rec->length != block_group_rec->offset ||
8010 chunk_rec->offset != block_group_rec->objectid ||
8012 chunk_rec->type_flags != block_group_rec->flags)) {
8015 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8016 chunk_rec->objectid,
8021 chunk_rec->type_flags,
8022 block_group_rec->objectid,
8023 block_group_rec->type,
8024 block_group_rec->offset,
8025 block_group_rec->offset,
8026 block_group_rec->objectid,
8027 block_group_rec->flags);
8030 list_del_init(&block_group_rec->list);
8031 chunk_rec->bg_rec = block_group_rec;
8036 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8037 chunk_rec->objectid,
8042 chunk_rec->type_flags);
8049 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8050 chunk_rec->num_stripes);
8051 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8052 devid = chunk_rec->stripes[i].devid;
8053 offset = chunk_rec->stripes[i].offset;
8054 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8055 devid, offset, length);
8056 if (dev_extent_item) {
8057 dev_extent_rec = container_of(dev_extent_item,
8058 struct device_extent_record,
8060 if (dev_extent_rec->objectid != devid ||
8061 dev_extent_rec->offset != offset ||
8062 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8063 dev_extent_rec->length != length) {
8066 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8067 chunk_rec->objectid,
8070 chunk_rec->stripes[i].devid,
8071 chunk_rec->stripes[i].offset,
8072 dev_extent_rec->objectid,
8073 dev_extent_rec->offset,
8074 dev_extent_rec->length);
8077 list_move(&dev_extent_rec->chunk_list,
8078 &chunk_rec->dextents);
8083 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8084 chunk_rec->objectid,
8087 chunk_rec->stripes[i].devid,
8088 chunk_rec->stripes[i].offset);
8095 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8096 int check_chunks(struct cache_tree *chunk_cache,
8097 struct block_group_tree *block_group_cache,
8098 struct device_extent_tree *dev_extent_cache,
8099 struct list_head *good, struct list_head *bad,
8100 struct list_head *rebuild, int silent)
8102 struct cache_extent *chunk_item;
8103 struct chunk_record *chunk_rec;
8104 struct block_group_record *bg_rec;
8105 struct device_extent_record *dext_rec;
8109 chunk_item = first_cache_extent(chunk_cache);
8110 while (chunk_item) {
8111 chunk_rec = container_of(chunk_item, struct chunk_record,
8113 err = check_chunk_refs(chunk_rec, block_group_cache,
8114 dev_extent_cache, silent);
8117 if (err == 0 && good)
8118 list_add_tail(&chunk_rec->list, good);
8119 if (err > 0 && rebuild)
8120 list_add_tail(&chunk_rec->list, rebuild);
8122 list_add_tail(&chunk_rec->list, bad);
8123 chunk_item = next_cache_extent(chunk_item);
8126 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8129 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8137 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8141 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8152 static int check_device_used(struct device_record *dev_rec,
8153 struct device_extent_tree *dext_cache)
8155 struct cache_extent *cache;
8156 struct device_extent_record *dev_extent_rec;
8159 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8161 dev_extent_rec = container_of(cache,
8162 struct device_extent_record,
8164 if (dev_extent_rec->objectid != dev_rec->devid)
8167 list_del_init(&dev_extent_rec->device_list);
8168 total_byte += dev_extent_rec->length;
8169 cache = next_cache_extent(cache);
8172 if (total_byte != dev_rec->byte_used) {
8174 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8175 total_byte, dev_rec->byte_used, dev_rec->objectid,
8176 dev_rec->type, dev_rec->offset);
8183 /* check btrfs_dev_item -> btrfs_dev_extent */
8184 static int check_devices(struct rb_root *dev_cache,
8185 struct device_extent_tree *dev_extent_cache)
8187 struct rb_node *dev_node;
8188 struct device_record *dev_rec;
8189 struct device_extent_record *dext_rec;
8193 dev_node = rb_first(dev_cache);
8195 dev_rec = container_of(dev_node, struct device_record, node);
8196 err = check_device_used(dev_rec, dev_extent_cache);
8200 dev_node = rb_next(dev_node);
8202 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8205 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8206 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8213 static int add_root_item_to_list(struct list_head *head,
8214 u64 objectid, u64 bytenr, u64 last_snapshot,
8215 u8 level, u8 drop_level,
8216 int level_size, struct btrfs_key *drop_key)
8219 struct root_item_record *ri_rec;
8220 ri_rec = malloc(sizeof(*ri_rec));
8223 ri_rec->bytenr = bytenr;
8224 ri_rec->objectid = objectid;
8225 ri_rec->level = level;
8226 ri_rec->level_size = level_size;
8227 ri_rec->drop_level = drop_level;
8228 ri_rec->last_snapshot = last_snapshot;
8230 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8231 list_add_tail(&ri_rec->list, head);
8236 static void free_root_item_list(struct list_head *list)
8238 struct root_item_record *ri_rec;
8240 while (!list_empty(list)) {
8241 ri_rec = list_first_entry(list, struct root_item_record,
8243 list_del_init(&ri_rec->list);
8248 static int deal_root_from_list(struct list_head *list,
8249 struct btrfs_root *root,
8250 struct block_info *bits,
8252 struct cache_tree *pending,
8253 struct cache_tree *seen,
8254 struct cache_tree *reada,
8255 struct cache_tree *nodes,
8256 struct cache_tree *extent_cache,
8257 struct cache_tree *chunk_cache,
8258 struct rb_root *dev_cache,
8259 struct block_group_tree *block_group_cache,
8260 struct device_extent_tree *dev_extent_cache)
8265 while (!list_empty(list)) {
8266 struct root_item_record *rec;
8267 struct extent_buffer *buf;
8268 rec = list_entry(list->next,
8269 struct root_item_record, list);
8271 buf = read_tree_block(root->fs_info->tree_root,
8272 rec->bytenr, rec->level_size, 0);
8273 if (!extent_buffer_uptodate(buf)) {
8274 free_extent_buffer(buf);
8278 add_root_to_pending(buf, extent_cache, pending,
8279 seen, nodes, rec->objectid);
8281 * To rebuild extent tree, we need deal with snapshot
8282 * one by one, otherwise we deal with node firstly which
8283 * can maximize readahead.
8286 ret = run_next_block(root, bits, bits_nr, &last,
8287 pending, seen, reada, nodes,
8288 extent_cache, chunk_cache,
8289 dev_cache, block_group_cache,
8290 dev_extent_cache, rec);
8294 free_extent_buffer(buf);
8295 list_del(&rec->list);
8301 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8302 reada, nodes, extent_cache, chunk_cache,
8303 dev_cache, block_group_cache,
8304 dev_extent_cache, NULL);
8314 static int check_chunks_and_extents(struct btrfs_root *root)
8316 struct rb_root dev_cache;
8317 struct cache_tree chunk_cache;
8318 struct block_group_tree block_group_cache;
8319 struct device_extent_tree dev_extent_cache;
8320 struct cache_tree extent_cache;
8321 struct cache_tree seen;
8322 struct cache_tree pending;
8323 struct cache_tree reada;
8324 struct cache_tree nodes;
8325 struct extent_io_tree excluded_extents;
8326 struct cache_tree corrupt_blocks;
8327 struct btrfs_path path;
8328 struct btrfs_key key;
8329 struct btrfs_key found_key;
8331 struct block_info *bits;
8333 struct extent_buffer *leaf;
8335 struct btrfs_root_item ri;
8336 struct list_head dropping_trees;
8337 struct list_head normal_trees;
8338 struct btrfs_root *root1;
8343 dev_cache = RB_ROOT;
8344 cache_tree_init(&chunk_cache);
8345 block_group_tree_init(&block_group_cache);
8346 device_extent_tree_init(&dev_extent_cache);
8348 cache_tree_init(&extent_cache);
8349 cache_tree_init(&seen);
8350 cache_tree_init(&pending);
8351 cache_tree_init(&nodes);
8352 cache_tree_init(&reada);
8353 cache_tree_init(&corrupt_blocks);
8354 extent_io_tree_init(&excluded_extents);
8355 INIT_LIST_HEAD(&dropping_trees);
8356 INIT_LIST_HEAD(&normal_trees);
8359 root->fs_info->excluded_extents = &excluded_extents;
8360 root->fs_info->fsck_extent_cache = &extent_cache;
8361 root->fs_info->free_extent_hook = free_extent_hook;
8362 root->fs_info->corrupt_blocks = &corrupt_blocks;
8366 bits = malloc(bits_nr * sizeof(struct block_info));
8372 if (ctx.progress_enabled) {
8373 ctx.tp = TASK_EXTENTS;
8374 task_start(ctx.info);
8378 root1 = root->fs_info->tree_root;
8379 level = btrfs_header_level(root1->node);
8380 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8381 root1->node->start, 0, level, 0,
8382 root1->nodesize, NULL);
8385 root1 = root->fs_info->chunk_root;
8386 level = btrfs_header_level(root1->node);
8387 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8388 root1->node->start, 0, level, 0,
8389 root1->nodesize, NULL);
8392 btrfs_init_path(&path);
8395 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8396 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8401 leaf = path.nodes[0];
8402 slot = path.slots[0];
8403 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8404 ret = btrfs_next_leaf(root, &path);
8407 leaf = path.nodes[0];
8408 slot = path.slots[0];
8410 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8411 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8412 unsigned long offset;
8415 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8416 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8417 last_snapshot = btrfs_root_last_snapshot(&ri);
8418 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8419 level = btrfs_root_level(&ri);
8420 level_size = root->nodesize;
8421 ret = add_root_item_to_list(&normal_trees,
8423 btrfs_root_bytenr(&ri),
8424 last_snapshot, level,
8425 0, level_size, NULL);
8429 level = btrfs_root_level(&ri);
8430 level_size = root->nodesize;
8431 objectid = found_key.objectid;
8432 btrfs_disk_key_to_cpu(&found_key,
8434 ret = add_root_item_to_list(&dropping_trees,
8436 btrfs_root_bytenr(&ri),
8437 last_snapshot, level,
8439 level_size, &found_key);
8446 btrfs_release_path(&path);
8449 * check_block can return -EAGAIN if it fixes something, please keep
8450 * this in mind when dealing with return values from these functions, if
8451 * we get -EAGAIN we want to fall through and restart the loop.
8453 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8454 &seen, &reada, &nodes, &extent_cache,
8455 &chunk_cache, &dev_cache, &block_group_cache,
8462 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8463 &pending, &seen, &reada, &nodes,
8464 &extent_cache, &chunk_cache, &dev_cache,
8465 &block_group_cache, &dev_extent_cache);
8472 ret = check_chunks(&chunk_cache, &block_group_cache,
8473 &dev_extent_cache, NULL, NULL, NULL, 0);
8480 ret = check_extent_refs(root, &extent_cache);
8487 ret = check_devices(&dev_cache, &dev_extent_cache);
8492 task_stop(ctx.info);
8494 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8495 extent_io_tree_cleanup(&excluded_extents);
8496 root->fs_info->fsck_extent_cache = NULL;
8497 root->fs_info->free_extent_hook = NULL;
8498 root->fs_info->corrupt_blocks = NULL;
8499 root->fs_info->excluded_extents = NULL;
8502 free_chunk_cache_tree(&chunk_cache);
8503 free_device_cache_tree(&dev_cache);
8504 free_block_group_tree(&block_group_cache);
8505 free_device_extent_tree(&dev_extent_cache);
8506 free_extent_cache_tree(&seen);
8507 free_extent_cache_tree(&pending);
8508 free_extent_cache_tree(&reada);
8509 free_extent_cache_tree(&nodes);
8512 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8513 free_extent_cache_tree(&seen);
8514 free_extent_cache_tree(&pending);
8515 free_extent_cache_tree(&reada);
8516 free_extent_cache_tree(&nodes);
8517 free_chunk_cache_tree(&chunk_cache);
8518 free_block_group_tree(&block_group_cache);
8519 free_device_cache_tree(&dev_cache);
8520 free_device_extent_tree(&dev_extent_cache);
8521 free_extent_record_cache(root->fs_info, &extent_cache);
8522 free_root_item_list(&normal_trees);
8523 free_root_item_list(&dropping_trees);
8524 extent_io_tree_cleanup(&excluded_extents);
8528 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8529 struct btrfs_root *root, int overwrite)
8531 struct extent_buffer *c;
8532 struct extent_buffer *old = root->node;
8535 struct btrfs_disk_key disk_key = {0,0,0};
8541 extent_buffer_get(c);
8544 c = btrfs_alloc_free_block(trans, root,
8546 root->root_key.objectid,
8547 &disk_key, level, 0, 0);
8550 extent_buffer_get(c);
8554 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8555 btrfs_set_header_level(c, level);
8556 btrfs_set_header_bytenr(c, c->start);
8557 btrfs_set_header_generation(c, trans->transid);
8558 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8559 btrfs_set_header_owner(c, root->root_key.objectid);
8561 write_extent_buffer(c, root->fs_info->fsid,
8562 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8564 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8565 btrfs_header_chunk_tree_uuid(c),
8568 btrfs_mark_buffer_dirty(c);
8570 * this case can happen in the following case:
8572 * 1.overwrite previous root.
8574 * 2.reinit reloc data root, this is because we skip pin
8575 * down reloc data tree before which means we can allocate
8576 * same block bytenr here.
8578 if (old->start == c->start) {
8579 btrfs_set_root_generation(&root->root_item,
8581 root->root_item.level = btrfs_header_level(root->node);
8582 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8583 &root->root_key, &root->root_item);
8585 free_extent_buffer(c);
8589 free_extent_buffer(old);
8591 add_root_to_dirty_list(root);
8595 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8596 struct extent_buffer *eb, int tree_root)
8598 struct extent_buffer *tmp;
8599 struct btrfs_root_item *ri;
8600 struct btrfs_key key;
8603 int level = btrfs_header_level(eb);
8609 * If we have pinned this block before, don't pin it again.
8610 * This can not only avoid forever loop with broken filesystem
8611 * but also give us some speedups.
8613 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8614 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8617 btrfs_pin_extent(fs_info, eb->start, eb->len);
8619 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8620 nritems = btrfs_header_nritems(eb);
8621 for (i = 0; i < nritems; i++) {
8623 btrfs_item_key_to_cpu(eb, &key, i);
8624 if (key.type != BTRFS_ROOT_ITEM_KEY)
8626 /* Skip the extent root and reloc roots */
8627 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8628 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8629 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8631 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8632 bytenr = btrfs_disk_root_bytenr(eb, ri);
8635 * If at any point we start needing the real root we
8636 * will have to build a stump root for the root we are
8637 * in, but for now this doesn't actually use the root so
8638 * just pass in extent_root.
8640 tmp = read_tree_block(fs_info->extent_root, bytenr,
8642 if (!extent_buffer_uptodate(tmp)) {
8643 fprintf(stderr, "Error reading root block\n");
8646 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8647 free_extent_buffer(tmp);
8651 bytenr = btrfs_node_blockptr(eb, i);
8653 /* If we aren't the tree root don't read the block */
8654 if (level == 1 && !tree_root) {
8655 btrfs_pin_extent(fs_info, bytenr, nodesize);
8659 tmp = read_tree_block(fs_info->extent_root, bytenr,
8661 if (!extent_buffer_uptodate(tmp)) {
8662 fprintf(stderr, "Error reading tree block\n");
8665 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8666 free_extent_buffer(tmp);
8675 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8679 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8683 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8686 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8688 struct btrfs_block_group_cache *cache;
8689 struct btrfs_path *path;
8690 struct extent_buffer *leaf;
8691 struct btrfs_chunk *chunk;
8692 struct btrfs_key key;
8696 path = btrfs_alloc_path();
8701 key.type = BTRFS_CHUNK_ITEM_KEY;
8704 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8706 btrfs_free_path(path);
8711 * We do this in case the block groups were screwed up and had alloc
8712 * bits that aren't actually set on the chunks. This happens with
8713 * restored images every time and could happen in real life I guess.
8715 fs_info->avail_data_alloc_bits = 0;
8716 fs_info->avail_metadata_alloc_bits = 0;
8717 fs_info->avail_system_alloc_bits = 0;
8719 /* First we need to create the in-memory block groups */
8721 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8722 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8724 btrfs_free_path(path);
8732 leaf = path->nodes[0];
8733 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8734 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8739 chunk = btrfs_item_ptr(leaf, path->slots[0],
8740 struct btrfs_chunk);
8741 btrfs_add_block_group(fs_info, 0,
8742 btrfs_chunk_type(leaf, chunk),
8743 key.objectid, key.offset,
8744 btrfs_chunk_length(leaf, chunk));
8745 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8746 key.offset + btrfs_chunk_length(leaf, chunk),
8752 cache = btrfs_lookup_first_block_group(fs_info, start);
8756 start = cache->key.objectid + cache->key.offset;
8759 btrfs_free_path(path);
8763 static int reset_balance(struct btrfs_trans_handle *trans,
8764 struct btrfs_fs_info *fs_info)
8766 struct btrfs_root *root = fs_info->tree_root;
8767 struct btrfs_path *path;
8768 struct extent_buffer *leaf;
8769 struct btrfs_key key;
8770 int del_slot, del_nr = 0;
8774 path = btrfs_alloc_path();
8778 key.objectid = BTRFS_BALANCE_OBJECTID;
8779 key.type = BTRFS_BALANCE_ITEM_KEY;
8782 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8787 goto reinit_data_reloc;
8792 ret = btrfs_del_item(trans, root, path);
8795 btrfs_release_path(path);
8797 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8798 key.type = BTRFS_ROOT_ITEM_KEY;
8801 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8805 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8810 ret = btrfs_del_items(trans, root, path,
8817 btrfs_release_path(path);
8820 ret = btrfs_search_slot(trans, root, &key, path,
8827 leaf = path->nodes[0];
8828 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8829 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8831 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8836 del_slot = path->slots[0];
8845 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8849 btrfs_release_path(path);
8852 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8853 key.type = BTRFS_ROOT_ITEM_KEY;
8854 key.offset = (u64)-1;
8855 root = btrfs_read_fs_root(fs_info, &key);
8857 fprintf(stderr, "Error reading data reloc tree\n");
8858 ret = PTR_ERR(root);
8861 record_root_in_trans(trans, root);
8862 ret = btrfs_fsck_reinit_root(trans, root, 0);
8865 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8867 btrfs_free_path(path);
8871 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8872 struct btrfs_fs_info *fs_info)
8878 * The only reason we don't do this is because right now we're just
8879 * walking the trees we find and pinning down their bytes, we don't look
8880 * at any of the leaves. In order to do mixed groups we'd have to check
8881 * the leaves of any fs roots and pin down the bytes for any file
8882 * extents we find. Not hard but why do it if we don't have to?
8884 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8885 fprintf(stderr, "We don't support re-initing the extent tree "
8886 "for mixed block groups yet, please notify a btrfs "
8887 "developer you want to do this so they can add this "
8888 "functionality.\n");
8893 * first we need to walk all of the trees except the extent tree and pin
8894 * down the bytes that are in use so we don't overwrite any existing
8897 ret = pin_metadata_blocks(fs_info);
8899 fprintf(stderr, "error pinning down used bytes\n");
8904 * Need to drop all the block groups since we're going to recreate all
8907 btrfs_free_block_groups(fs_info);
8908 ret = reset_block_groups(fs_info);
8910 fprintf(stderr, "error resetting the block groups\n");
8914 /* Ok we can allocate now, reinit the extent root */
8915 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8917 fprintf(stderr, "extent root initialization failed\n");
8919 * When the transaction code is updated we should end the
8920 * transaction, but for now progs only knows about commit so
8921 * just return an error.
8927 * Now we have all the in-memory block groups setup so we can make
8928 * allocations properly, and the metadata we care about is safe since we
8929 * pinned all of it above.
8932 struct btrfs_block_group_cache *cache;
8934 cache = btrfs_lookup_first_block_group(fs_info, start);
8937 start = cache->key.objectid + cache->key.offset;
8938 ret = btrfs_insert_item(trans, fs_info->extent_root,
8939 &cache->key, &cache->item,
8940 sizeof(cache->item));
8942 fprintf(stderr, "Error adding block group\n");
8945 btrfs_extent_post_op(trans, fs_info->extent_root);
8948 ret = reset_balance(trans, fs_info);
8950 fprintf(stderr, "error resetting the pending balance\n");
8955 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8957 struct btrfs_path *path;
8958 struct btrfs_trans_handle *trans;
8959 struct btrfs_key key;
8962 printf("Recowing metadata block %llu\n", eb->start);
8963 key.objectid = btrfs_header_owner(eb);
8964 key.type = BTRFS_ROOT_ITEM_KEY;
8965 key.offset = (u64)-1;
8967 root = btrfs_read_fs_root(root->fs_info, &key);
8969 fprintf(stderr, "Couldn't find owner root %llu\n",
8971 return PTR_ERR(root);
8974 path = btrfs_alloc_path();
8978 trans = btrfs_start_transaction(root, 1);
8979 if (IS_ERR(trans)) {
8980 btrfs_free_path(path);
8981 return PTR_ERR(trans);
8984 path->lowest_level = btrfs_header_level(eb);
8985 if (path->lowest_level)
8986 btrfs_node_key_to_cpu(eb, &key, 0);
8988 btrfs_item_key_to_cpu(eb, &key, 0);
8990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
8991 btrfs_commit_transaction(trans, root);
8992 btrfs_free_path(path);
8996 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
8998 struct btrfs_path *path;
8999 struct btrfs_trans_handle *trans;
9000 struct btrfs_key key;
9003 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9004 bad->key.type, bad->key.offset);
9005 key.objectid = bad->root_id;
9006 key.type = BTRFS_ROOT_ITEM_KEY;
9007 key.offset = (u64)-1;
9009 root = btrfs_read_fs_root(root->fs_info, &key);
9011 fprintf(stderr, "Couldn't find owner root %llu\n",
9013 return PTR_ERR(root);
9016 path = btrfs_alloc_path();
9020 trans = btrfs_start_transaction(root, 1);
9021 if (IS_ERR(trans)) {
9022 btrfs_free_path(path);
9023 return PTR_ERR(trans);
9026 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9032 ret = btrfs_del_item(trans, root, path);
9034 btrfs_commit_transaction(trans, root);
9035 btrfs_free_path(path);
9039 static int zero_log_tree(struct btrfs_root *root)
9041 struct btrfs_trans_handle *trans;
9044 trans = btrfs_start_transaction(root, 1);
9045 if (IS_ERR(trans)) {
9046 ret = PTR_ERR(trans);
9049 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9050 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9051 ret = btrfs_commit_transaction(trans, root);
9055 static int populate_csum(struct btrfs_trans_handle *trans,
9056 struct btrfs_root *csum_root, char *buf, u64 start,
9063 while (offset < len) {
9064 sectorsize = csum_root->sectorsize;
9065 ret = read_extent_data(csum_root, buf, start + offset,
9069 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9070 start + offset, buf, sectorsize);
9073 offset += sectorsize;
9078 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9079 struct btrfs_root *csum_root,
9080 struct btrfs_root *cur_root)
9082 struct btrfs_path *path;
9083 struct btrfs_key key;
9084 struct extent_buffer *node;
9085 struct btrfs_file_extent_item *fi;
9092 path = btrfs_alloc_path();
9095 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9105 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9108 /* Iterate all regular file extents and fill its csum */
9110 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9112 if (key.type != BTRFS_EXTENT_DATA_KEY)
9114 node = path->nodes[0];
9115 slot = path->slots[0];
9116 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9117 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9119 start = btrfs_file_extent_disk_bytenr(node, fi);
9120 len = btrfs_file_extent_disk_num_bytes(node, fi);
9122 ret = populate_csum(trans, csum_root, buf, start, len);
9129 * TODO: if next leaf is corrupted, jump to nearest next valid
9132 ret = btrfs_next_item(cur_root, path);
9142 btrfs_free_path(path);
9147 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9148 struct btrfs_root *csum_root)
9150 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9151 struct btrfs_path *path;
9152 struct btrfs_root *tree_root = fs_info->tree_root;
9153 struct btrfs_root *cur_root;
9154 struct extent_buffer *node;
9155 struct btrfs_key key;
9159 path = btrfs_alloc_path();
9163 key.objectid = BTRFS_FS_TREE_OBJECTID;
9165 key.type = BTRFS_ROOT_ITEM_KEY;
9167 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9176 node = path->nodes[0];
9177 slot = path->slots[0];
9178 btrfs_item_key_to_cpu(node, &key, slot);
9179 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9181 if (key.type != BTRFS_ROOT_ITEM_KEY)
9183 if (!is_fstree(key.objectid))
9185 key.offset = (u64)-1;
9187 cur_root = btrfs_read_fs_root(fs_info, &key);
9188 if (IS_ERR(cur_root) || !cur_root) {
9189 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9193 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9198 ret = btrfs_next_item(tree_root, path);
9208 btrfs_free_path(path);
9212 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9213 struct btrfs_root *csum_root)
9215 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9216 struct btrfs_path *path;
9217 struct btrfs_extent_item *ei;
9218 struct extent_buffer *leaf;
9220 struct btrfs_key key;
9223 path = btrfs_alloc_path();
9228 key.type = BTRFS_EXTENT_ITEM_KEY;
9231 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9233 btrfs_free_path(path);
9237 buf = malloc(csum_root->sectorsize);
9239 btrfs_free_path(path);
9244 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9245 ret = btrfs_next_leaf(extent_root, path);
9253 leaf = path->nodes[0];
9255 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9256 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9261 ei = btrfs_item_ptr(leaf, path->slots[0],
9262 struct btrfs_extent_item);
9263 if (!(btrfs_extent_flags(leaf, ei) &
9264 BTRFS_EXTENT_FLAG_DATA)) {
9269 ret = populate_csum(trans, csum_root, buf, key.objectid,
9276 btrfs_free_path(path);
9282 * Recalculate the csum and put it into the csum tree.
9284 * Extent tree init will wipe out all the extent info, so in that case, we
9285 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9286 * will use fs/subvol trees to init the csum tree.
9288 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9289 struct btrfs_root *csum_root,
9293 return fill_csum_tree_from_fs(trans, csum_root);
9295 return fill_csum_tree_from_extent(trans, csum_root);
9298 static void free_roots_info_cache(void)
9300 if (!roots_info_cache)
9303 while (!cache_tree_empty(roots_info_cache)) {
9304 struct cache_extent *entry;
9305 struct root_item_info *rii;
9307 entry = first_cache_extent(roots_info_cache);
9310 remove_cache_extent(roots_info_cache, entry);
9311 rii = container_of(entry, struct root_item_info, cache_extent);
9315 free(roots_info_cache);
9316 roots_info_cache = NULL;
9319 static int build_roots_info_cache(struct btrfs_fs_info *info)
9322 struct btrfs_key key;
9323 struct extent_buffer *leaf;
9324 struct btrfs_path *path;
9326 if (!roots_info_cache) {
9327 roots_info_cache = malloc(sizeof(*roots_info_cache));
9328 if (!roots_info_cache)
9330 cache_tree_init(roots_info_cache);
9333 path = btrfs_alloc_path();
9338 key.type = BTRFS_EXTENT_ITEM_KEY;
9341 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9344 leaf = path->nodes[0];
9347 struct btrfs_key found_key;
9348 struct btrfs_extent_item *ei;
9349 struct btrfs_extent_inline_ref *iref;
9350 int slot = path->slots[0];
9355 struct cache_extent *entry;
9356 struct root_item_info *rii;
9358 if (slot >= btrfs_header_nritems(leaf)) {
9359 ret = btrfs_next_leaf(info->extent_root, path);
9366 leaf = path->nodes[0];
9367 slot = path->slots[0];
9370 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9372 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9373 found_key.type != BTRFS_METADATA_ITEM_KEY)
9376 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9377 flags = btrfs_extent_flags(leaf, ei);
9379 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9380 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9383 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9384 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9385 level = found_key.offset;
9387 struct btrfs_tree_block_info *binfo;
9389 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9390 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9391 level = btrfs_tree_block_level(leaf, binfo);
9395 * For a root extent, it must be of the following type and the
9396 * first (and only one) iref in the item.
9398 type = btrfs_extent_inline_ref_type(leaf, iref);
9399 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9402 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9403 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9405 rii = malloc(sizeof(struct root_item_info));
9410 rii->cache_extent.start = root_id;
9411 rii->cache_extent.size = 1;
9412 rii->level = (u8)-1;
9413 entry = &rii->cache_extent;
9414 ret = insert_cache_extent(roots_info_cache, entry);
9417 rii = container_of(entry, struct root_item_info,
9421 ASSERT(rii->cache_extent.start == root_id);
9422 ASSERT(rii->cache_extent.size == 1);
9424 if (level > rii->level || rii->level == (u8)-1) {
9426 rii->bytenr = found_key.objectid;
9427 rii->gen = btrfs_extent_generation(leaf, ei);
9428 rii->node_count = 1;
9429 } else if (level == rii->level) {
9437 btrfs_free_path(path);
9442 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9443 struct btrfs_path *path,
9444 const struct btrfs_key *root_key,
9445 const int read_only_mode)
9447 const u64 root_id = root_key->objectid;
9448 struct cache_extent *entry;
9449 struct root_item_info *rii;
9450 struct btrfs_root_item ri;
9451 unsigned long offset;
9453 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9456 "Error: could not find extent items for root %llu\n",
9457 root_key->objectid);
9461 rii = container_of(entry, struct root_item_info, cache_extent);
9462 ASSERT(rii->cache_extent.start == root_id);
9463 ASSERT(rii->cache_extent.size == 1);
9465 if (rii->node_count != 1) {
9467 "Error: could not find btree root extent for root %llu\n",
9472 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9473 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9475 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9476 btrfs_root_level(&ri) != rii->level ||
9477 btrfs_root_generation(&ri) != rii->gen) {
9480 * If we're in repair mode but our caller told us to not update
9481 * the root item, i.e. just check if it needs to be updated, don't
9482 * print this message, since the caller will call us again shortly
9483 * for the same root item without read only mode (the caller will
9484 * open a transaction first).
9486 if (!(read_only_mode && repair))
9488 "%sroot item for root %llu,"
9489 " current bytenr %llu, current gen %llu, current level %u,"
9490 " new bytenr %llu, new gen %llu, new level %u\n",
9491 (read_only_mode ? "" : "fixing "),
9493 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9494 btrfs_root_level(&ri),
9495 rii->bytenr, rii->gen, rii->level);
9497 if (btrfs_root_generation(&ri) > rii->gen) {
9499 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9500 root_id, btrfs_root_generation(&ri), rii->gen);
9504 if (!read_only_mode) {
9505 btrfs_set_root_bytenr(&ri, rii->bytenr);
9506 btrfs_set_root_level(&ri, rii->level);
9507 btrfs_set_root_generation(&ri, rii->gen);
9508 write_extent_buffer(path->nodes[0], &ri,
9509 offset, sizeof(ri));
9519 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9520 * caused read-only snapshots to be corrupted if they were created at a moment
9521 * when the source subvolume/snapshot had orphan items. The issue was that the
9522 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9523 * node instead of the post orphan cleanup root node.
9524 * So this function, and its callees, just detects and fixes those cases. Even
9525 * though the regression was for read-only snapshots, this function applies to
9526 * any snapshot/subvolume root.
9527 * This must be run before any other repair code - not doing it so, makes other
9528 * repair code delete or modify backrefs in the extent tree for example, which
9529 * will result in an inconsistent fs after repairing the root items.
9531 static int repair_root_items(struct btrfs_fs_info *info)
9533 struct btrfs_path *path = NULL;
9534 struct btrfs_key key;
9535 struct extent_buffer *leaf;
9536 struct btrfs_trans_handle *trans = NULL;
9541 ret = build_roots_info_cache(info);
9545 path = btrfs_alloc_path();
9551 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9552 key.type = BTRFS_ROOT_ITEM_KEY;
9557 * Avoid opening and committing transactions if a leaf doesn't have
9558 * any root items that need to be fixed, so that we avoid rotating
9559 * backup roots unnecessarily.
9562 trans = btrfs_start_transaction(info->tree_root, 1);
9563 if (IS_ERR(trans)) {
9564 ret = PTR_ERR(trans);
9569 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9573 leaf = path->nodes[0];
9576 struct btrfs_key found_key;
9578 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9579 int no_more_keys = find_next_key(path, &key);
9581 btrfs_release_path(path);
9583 ret = btrfs_commit_transaction(trans,
9595 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9597 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9599 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9602 ret = maybe_repair_root_item(info, path, &found_key,
9607 if (!trans && repair) {
9610 btrfs_release_path(path);
9620 free_roots_info_cache();
9621 btrfs_free_path(path);
9623 btrfs_commit_transaction(trans, info->tree_root);
9630 const char * const cmd_check_usage[] = {
9631 "btrfs check [options] <device>",
9632 "Check structural integrity of a filesystem (unmounted).",
9633 "Check structural integrity of an unmounted filesystem. Verify internal",
9634 "trees' consistency and item connectivity. In the repair mode try to",
9635 "fix the problems found.",
9636 "WARNING: the repair mode is considered dangerous",
9638 "-s|--super <superblock> use this superblock copy",
9639 "-b|--backup use the first valid backup root copy",
9640 "--repair try to repair the filesystem",
9641 "--readonly run in read-only mode (default)",
9642 "--init-csum-tree create a new CRC tree",
9643 "--init-extent-tree create a new extent tree",
9644 "--check-data-csum verify checksums of data blocks",
9645 "-Q|--qgroup-report print a report on qgroup consistency",
9646 "-E|--subvol-extents <subvolid>",
9647 " print subvolume extents and sharing state",
9648 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9649 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9650 "-p|--progress indicate progress",
9654 int cmd_check(int argc, char **argv)
9656 struct cache_tree root_cache;
9657 struct btrfs_root *root;
9658 struct btrfs_fs_info *info;
9661 u64 tree_root_bytenr = 0;
9662 u64 chunk_root_bytenr = 0;
9663 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9666 int init_csum_tree = 0;
9668 int qgroup_report = 0;
9669 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9673 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9674 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9675 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9676 static const struct option long_options[] = {
9677 { "super", required_argument, NULL, 's' },
9678 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9679 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9680 { "init-csum-tree", no_argument, NULL,
9681 GETOPT_VAL_INIT_CSUM },
9682 { "init-extent-tree", no_argument, NULL,
9683 GETOPT_VAL_INIT_EXTENT },
9684 { "check-data-csum", no_argument, NULL,
9685 GETOPT_VAL_CHECK_CSUM },
9686 { "backup", no_argument, NULL, 'b' },
9687 { "subvol-extents", required_argument, NULL, 'E' },
9688 { "qgroup-report", no_argument, NULL, 'Q' },
9689 { "tree-root", required_argument, NULL, 'r' },
9690 { "chunk-root", required_argument, NULL,
9691 GETOPT_VAL_CHUNK_TREE },
9692 { "progress", no_argument, NULL, 'p' },
9696 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9700 case 'a': /* ignored */ break;
9702 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9705 num = arg_strtou64(optarg);
9706 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9708 "ERROR: super mirror should be less than: %d\n",
9709 BTRFS_SUPER_MIRROR_MAX);
9712 bytenr = btrfs_sb_offset(((int)num));
9713 printf("using SB copy %llu, bytenr %llu\n", num,
9714 (unsigned long long)bytenr);
9720 subvolid = arg_strtou64(optarg);
9723 tree_root_bytenr = arg_strtou64(optarg);
9725 case GETOPT_VAL_CHUNK_TREE:
9726 chunk_root_bytenr = arg_strtou64(optarg);
9729 ctx.progress_enabled = true;
9733 usage(cmd_check_usage);
9734 case GETOPT_VAL_REPAIR:
9735 printf("enabling repair mode\n");
9737 ctree_flags |= OPEN_CTREE_WRITES;
9739 case GETOPT_VAL_READONLY:
9742 case GETOPT_VAL_INIT_CSUM:
9743 printf("Creating a new CRC tree\n");
9746 ctree_flags |= OPEN_CTREE_WRITES;
9748 case GETOPT_VAL_INIT_EXTENT:
9749 init_extent_tree = 1;
9750 ctree_flags |= (OPEN_CTREE_WRITES |
9751 OPEN_CTREE_NO_BLOCK_GROUPS);
9754 case GETOPT_VAL_CHECK_CSUM:
9755 check_data_csum = 1;
9760 if (check_argc_exact(argc - optind, 1))
9761 usage(cmd_check_usage);
9763 if (ctx.progress_enabled) {
9764 ctx.tp = TASK_NOTHING;
9765 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9768 /* This check is the only reason for --readonly to exist */
9769 if (readonly && repair) {
9770 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9775 cache_tree_init(&root_cache);
9777 if((ret = check_mounted(argv[optind])) < 0) {
9778 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9781 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9786 /* only allow partial opening under repair mode */
9788 ctree_flags |= OPEN_CTREE_PARTIAL;
9790 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9791 chunk_root_bytenr, ctree_flags);
9793 fprintf(stderr, "Couldn't open file system\n");
9799 root = info->fs_root;
9802 * repair mode will force us to commit transaction which
9803 * will make us fail to load log tree when mounting.
9805 if (repair && btrfs_super_log_root(info->super_copy)) {
9806 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9811 ret = zero_log_tree(root);
9813 fprintf(stderr, "fail to zero log tree\n");
9818 uuid_unparse(info->super_copy->fsid, uuidbuf);
9819 if (qgroup_report) {
9820 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9822 ret = qgroup_verify_all(info);
9824 ret = report_qgroups(1);
9828 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9829 subvolid, argv[optind], uuidbuf);
9830 ret = print_extent_state(info, subvolid);
9833 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9835 if (!extent_buffer_uptodate(info->tree_root->node) ||
9836 !extent_buffer_uptodate(info->dev_root->node) ||
9837 !extent_buffer_uptodate(info->chunk_root->node)) {
9838 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9843 if (init_extent_tree || init_csum_tree) {
9844 struct btrfs_trans_handle *trans;
9846 trans = btrfs_start_transaction(info->extent_root, 0);
9847 if (IS_ERR(trans)) {
9848 fprintf(stderr, "Error starting transaction\n");
9849 ret = PTR_ERR(trans);
9853 if (init_extent_tree) {
9854 printf("Creating a new extent tree\n");
9855 ret = reinit_extent_tree(trans, info);
9860 if (init_csum_tree) {
9861 fprintf(stderr, "Reinit crc root\n");
9862 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9864 fprintf(stderr, "crc root initialization failed\n");
9869 ret = fill_csum_tree(trans, info->csum_root,
9872 fprintf(stderr, "crc refilling failed\n");
9877 * Ok now we commit and run the normal fsck, which will add
9878 * extent entries for all of the items it finds.
9880 ret = btrfs_commit_transaction(trans, info->extent_root);
9884 if (!extent_buffer_uptodate(info->extent_root->node)) {
9885 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9889 if (!extent_buffer_uptodate(info->csum_root->node)) {
9890 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9895 if (!ctx.progress_enabled)
9896 fprintf(stderr, "checking extents\n");
9897 ret = check_chunks_and_extents(root);
9899 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9901 ret = repair_root_items(info);
9905 fprintf(stderr, "Fixed %d roots.\n", ret);
9907 } else if (ret > 0) {
9909 "Found %d roots with an outdated root item.\n",
9912 "Please run a filesystem check with the option --repair to fix them.\n");
9917 if (!ctx.progress_enabled) {
9918 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9919 fprintf(stderr, "checking free space tree\n");
9921 fprintf(stderr, "checking free space cache\n");
9923 ret = check_space_cache(root);
9928 * We used to have to have these hole extents in between our real
9929 * extents so if we don't have this flag set we need to make sure there
9930 * are no gaps in the file extents for inodes, otherwise we can just
9931 * ignore it when this happens.
9933 no_holes = btrfs_fs_incompat(root->fs_info,
9934 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9935 if (!ctx.progress_enabled)
9936 fprintf(stderr, "checking fs roots\n");
9937 ret = check_fs_roots(root, &root_cache);
9941 fprintf(stderr, "checking csums\n");
9942 ret = check_csums(root);
9946 fprintf(stderr, "checking root refs\n");
9947 ret = check_root_refs(root, &root_cache);
9951 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9952 struct extent_buffer *eb;
9954 eb = list_first_entry(&root->fs_info->recow_ebs,
9955 struct extent_buffer, recow);
9956 list_del_init(&eb->recow);
9957 ret = recow_extent_buffer(root, eb);
9962 while (!list_empty(&delete_items)) {
9963 struct bad_item *bad;
9965 bad = list_first_entry(&delete_items, struct bad_item, list);
9966 list_del_init(&bad->list);
9968 ret = delete_bad_item(root, bad);
9972 if (info->quota_enabled) {
9974 fprintf(stderr, "checking quota groups\n");
9975 err = qgroup_verify_all(info);
9980 if (!list_empty(&root->fs_info->recow_ebs)) {
9981 fprintf(stderr, "Transid errors in file system\n");
9985 /* Don't override original ret */
9989 ret = report_qgroups(0);
9990 if (found_old_backref) { /*
9991 * there was a disk format change when mixed
9992 * backref was in testing tree. The old format
9993 * existed about one week.
9995 printf("\n * Found old mixed backref format. "
9996 "The old format is not supported! *"
9997 "\n * Please mount the FS in readonly mode, "
9998 "backup data and re-format the FS. *\n\n");
10001 printf("found %llu bytes used err is %d\n",
10002 (unsigned long long)bytes_used, ret);
10003 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10004 printf("total tree bytes: %llu\n",
10005 (unsigned long long)total_btree_bytes);
10006 printf("total fs tree bytes: %llu\n",
10007 (unsigned long long)total_fs_tree_bytes);
10008 printf("total extent tree bytes: %llu\n",
10009 (unsigned long long)total_extent_tree_bytes);
10010 printf("btree space waste bytes: %llu\n",
10011 (unsigned long long)btree_space_waste);
10012 printf("file data blocks allocated: %llu\n referenced %llu\n",
10013 (unsigned long long)data_bytes_allocated,
10014 (unsigned long long)data_bytes_referenced);
10016 free_qgroup_counts();
10017 free_root_recs_tree(&root_cache);
10021 if (ctx.progress_enabled)
10022 task_deinit(ctx.info);