2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
437 static void *print_status_check(void *p)
439 struct task_ctx *priv = p;
440 const char work_indicator[] = { '.', 'o', 'O', 'o' };
442 static char *task_position_string[] = {
444 "checking free space cache",
448 task_period_start(priv->info, 1000 /* 1s */);
450 if (priv->tp == TASK_NOTHING)
454 printf("%s [%c]\r", task_position_string[priv->tp],
455 work_indicator[count % 4]);
458 task_period_wait(priv->info);
463 static int print_status_return(void *p)
471 /* Compatible function to allow reuse of old codes */
472 static u64 first_extent_gap(struct rb_root *holes)
474 struct file_extent_hole *hole;
476 if (RB_EMPTY_ROOT(holes))
479 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
483 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
485 struct file_extent_hole *hole1;
486 struct file_extent_hole *hole2;
488 hole1 = rb_entry(node1, struct file_extent_hole, node);
489 hole2 = rb_entry(node2, struct file_extent_hole, node);
491 if (hole1->start > hole2->start)
493 if (hole1->start < hole2->start)
495 /* Now hole1->start == hole2->start */
496 if (hole1->len >= hole2->len)
498 * Hole 1 will be merge center
499 * Same hole will be merged later
502 /* Hole 2 will be merge center */
507 * Add a hole to the record
509 * This will do hole merge for copy_file_extent_holes(),
510 * which will ensure there won't be continuous holes.
512 static int add_file_extent_hole(struct rb_root *holes,
515 struct file_extent_hole *hole;
516 struct file_extent_hole *prev = NULL;
517 struct file_extent_hole *next = NULL;
519 hole = malloc(sizeof(*hole));
524 /* Since compare will not return 0, no -EEXIST will happen */
525 rb_insert(holes, &hole->node, compare_hole);
527 /* simple merge with previous hole */
528 if (rb_prev(&hole->node))
529 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
531 if (prev && prev->start + prev->len >= hole->start) {
532 hole->len = hole->start + hole->len - prev->start;
533 hole->start = prev->start;
534 rb_erase(&prev->node, holes);
539 /* iterate merge with next holes */
541 if (!rb_next(&hole->node))
543 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
545 if (hole->start + hole->len >= next->start) {
546 if (hole->start + hole->len <= next->start + next->len)
547 hole->len = next->start + next->len -
549 rb_erase(&next->node, holes);
558 static int compare_hole_range(struct rb_node *node, void *data)
560 struct file_extent_hole *hole;
563 hole = (struct file_extent_hole *)data;
566 hole = rb_entry(node, struct file_extent_hole, node);
567 if (start < hole->start)
569 if (start >= hole->start && start < hole->start + hole->len)
575 * Delete a hole in the record
577 * This will do the hole split and is much restrict than add.
579 static int del_file_extent_hole(struct rb_root *holes,
582 struct file_extent_hole *hole;
583 struct file_extent_hole tmp;
588 struct rb_node *node;
595 node = rb_search(holes, &tmp, compare_hole_range, NULL);
598 hole = rb_entry(node, struct file_extent_hole, node);
599 if (start + len > hole->start + hole->len)
603 * Now there will be no overlap, delete the hole and re-add the
604 * split(s) if they exists.
606 if (start > hole->start) {
607 prev_start = hole->start;
608 prev_len = start - hole->start;
611 if (hole->start + hole->len > start + len) {
612 next_start = start + len;
613 next_len = hole->start + hole->len - start - len;
616 rb_erase(node, holes);
619 ret = add_file_extent_hole(holes, prev_start, prev_len);
624 ret = add_file_extent_hole(holes, next_start, next_len);
631 static int copy_file_extent_holes(struct rb_root *dst,
634 struct file_extent_hole *hole;
635 struct rb_node *node;
638 node = rb_first(src);
640 hole = rb_entry(node, struct file_extent_hole, node);
641 ret = add_file_extent_hole(dst, hole->start, hole->len);
644 node = rb_next(node);
649 static void free_file_extent_holes(struct rb_root *holes)
651 struct rb_node *node;
652 struct file_extent_hole *hole;
654 node = rb_first(holes);
656 hole = rb_entry(node, struct file_extent_hole, node);
657 rb_erase(node, holes);
659 node = rb_first(holes);
663 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
665 static void record_root_in_trans(struct btrfs_trans_handle *trans,
666 struct btrfs_root *root)
668 if (root->last_trans != trans->transid) {
669 root->track_dirty = 1;
670 root->last_trans = trans->transid;
671 root->commit_root = root->node;
672 extent_buffer_get(root->node);
676 static u8 imode_to_type(u32 imode)
679 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
680 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
681 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
682 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
683 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
684 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
685 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
686 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
689 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
693 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
695 struct device_record *rec1;
696 struct device_record *rec2;
698 rec1 = rb_entry(node1, struct device_record, node);
699 rec2 = rb_entry(node2, struct device_record, node);
700 if (rec1->devid > rec2->devid)
702 else if (rec1->devid < rec2->devid)
708 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
710 struct inode_record *rec;
711 struct inode_backref *backref;
712 struct inode_backref *orig;
713 struct inode_backref *tmp;
714 struct orphan_data_extent *src_orphan;
715 struct orphan_data_extent *dst_orphan;
719 rec = malloc(sizeof(*rec));
721 return ERR_PTR(-ENOMEM);
722 memcpy(rec, orig_rec, sizeof(*rec));
724 INIT_LIST_HEAD(&rec->backrefs);
725 INIT_LIST_HEAD(&rec->orphan_extents);
726 rec->holes = RB_ROOT;
728 list_for_each_entry(orig, &orig_rec->backrefs, list) {
729 size = sizeof(*orig) + orig->namelen + 1;
730 backref = malloc(size);
735 memcpy(backref, orig, size);
736 list_add_tail(&backref->list, &rec->backrefs);
738 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
739 dst_orphan = malloc(sizeof(*dst_orphan));
744 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
745 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
747 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
753 if (!list_empty(&rec->backrefs))
754 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
755 list_del(&orig->list);
759 if (!list_empty(&rec->orphan_extents))
760 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
761 list_del(&orig->list);
770 static void print_orphan_data_extents(struct list_head *orphan_extents,
773 struct orphan_data_extent *orphan;
775 if (list_empty(orphan_extents))
777 printf("The following data extent is lost in tree %llu:\n",
779 list_for_each_entry(orphan, orphan_extents, list) {
780 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
781 orphan->objectid, orphan->offset, orphan->disk_bytenr,
786 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
788 u64 root_objectid = root->root_key.objectid;
789 int errors = rec->errors;
793 /* reloc root errors, we print its corresponding fs root objectid*/
794 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
795 root_objectid = root->root_key.offset;
796 fprintf(stderr, "reloc");
798 fprintf(stderr, "root %llu inode %llu errors %x",
799 (unsigned long long) root_objectid,
800 (unsigned long long) rec->ino, rec->errors);
802 if (errors & I_ERR_NO_INODE_ITEM)
803 fprintf(stderr, ", no inode item");
804 if (errors & I_ERR_NO_ORPHAN_ITEM)
805 fprintf(stderr, ", no orphan item");
806 if (errors & I_ERR_DUP_INODE_ITEM)
807 fprintf(stderr, ", dup inode item");
808 if (errors & I_ERR_DUP_DIR_INDEX)
809 fprintf(stderr, ", dup dir index");
810 if (errors & I_ERR_ODD_DIR_ITEM)
811 fprintf(stderr, ", odd dir item");
812 if (errors & I_ERR_ODD_FILE_EXTENT)
813 fprintf(stderr, ", odd file extent");
814 if (errors & I_ERR_BAD_FILE_EXTENT)
815 fprintf(stderr, ", bad file extent");
816 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
817 fprintf(stderr, ", file extent overlap");
818 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
819 fprintf(stderr, ", file extent discount");
820 if (errors & I_ERR_DIR_ISIZE_WRONG)
821 fprintf(stderr, ", dir isize wrong");
822 if (errors & I_ERR_FILE_NBYTES_WRONG)
823 fprintf(stderr, ", nbytes wrong");
824 if (errors & I_ERR_ODD_CSUM_ITEM)
825 fprintf(stderr, ", odd csum item");
826 if (errors & I_ERR_SOME_CSUM_MISSING)
827 fprintf(stderr, ", some csum missing");
828 if (errors & I_ERR_LINK_COUNT_WRONG)
829 fprintf(stderr, ", link count wrong");
830 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
831 fprintf(stderr, ", orphan file extent");
832 fprintf(stderr, "\n");
833 /* Print the orphan extents if needed */
834 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
835 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
837 /* Print the holes if needed */
838 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
839 struct file_extent_hole *hole;
840 struct rb_node *node;
843 node = rb_first(&rec->holes);
844 fprintf(stderr, "Found file extent holes:\n");
847 hole = rb_entry(node, struct file_extent_hole, node);
848 fprintf(stderr, "\tstart: %llu, len: %llu\n",
849 hole->start, hole->len);
850 node = rb_next(node);
853 fprintf(stderr, "\tstart: 0, len: %llu\n",
854 round_up(rec->isize, root->sectorsize));
858 static void print_ref_error(int errors)
860 if (errors & REF_ERR_NO_DIR_ITEM)
861 fprintf(stderr, ", no dir item");
862 if (errors & REF_ERR_NO_DIR_INDEX)
863 fprintf(stderr, ", no dir index");
864 if (errors & REF_ERR_NO_INODE_REF)
865 fprintf(stderr, ", no inode ref");
866 if (errors & REF_ERR_DUP_DIR_ITEM)
867 fprintf(stderr, ", dup dir item");
868 if (errors & REF_ERR_DUP_DIR_INDEX)
869 fprintf(stderr, ", dup dir index");
870 if (errors & REF_ERR_DUP_INODE_REF)
871 fprintf(stderr, ", dup inode ref");
872 if (errors & REF_ERR_INDEX_UNMATCH)
873 fprintf(stderr, ", index mismatch");
874 if (errors & REF_ERR_FILETYPE_UNMATCH)
875 fprintf(stderr, ", filetype mismatch");
876 if (errors & REF_ERR_NAME_TOO_LONG)
877 fprintf(stderr, ", name too long");
878 if (errors & REF_ERR_NO_ROOT_REF)
879 fprintf(stderr, ", no root ref");
880 if (errors & REF_ERR_NO_ROOT_BACKREF)
881 fprintf(stderr, ", no root backref");
882 if (errors & REF_ERR_DUP_ROOT_REF)
883 fprintf(stderr, ", dup root ref");
884 if (errors & REF_ERR_DUP_ROOT_BACKREF)
885 fprintf(stderr, ", dup root backref");
886 fprintf(stderr, "\n");
889 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
892 struct ptr_node *node;
893 struct cache_extent *cache;
894 struct inode_record *rec = NULL;
897 cache = lookup_cache_extent(inode_cache, ino, 1);
899 node = container_of(cache, struct ptr_node, cache);
901 if (mod && rec->refs > 1) {
902 node->data = clone_inode_rec(rec);
903 if (IS_ERR(node->data))
909 rec = calloc(1, sizeof(*rec));
911 return ERR_PTR(-ENOMEM);
913 rec->extent_start = (u64)-1;
915 INIT_LIST_HEAD(&rec->backrefs);
916 INIT_LIST_HEAD(&rec->orphan_extents);
917 rec->holes = RB_ROOT;
919 node = malloc(sizeof(*node));
922 return ERR_PTR(-ENOMEM);
924 node->cache.start = ino;
925 node->cache.size = 1;
928 if (ino == BTRFS_FREE_INO_OBJECTID)
931 ret = insert_cache_extent(inode_cache, &node->cache);
933 return ERR_PTR(-EEXIST);
938 static void free_orphan_data_extents(struct list_head *orphan_extents)
940 struct orphan_data_extent *orphan;
942 while (!list_empty(orphan_extents)) {
943 orphan = list_entry(orphan_extents->next,
944 struct orphan_data_extent, list);
945 list_del(&orphan->list);
950 static void free_inode_rec(struct inode_record *rec)
952 struct inode_backref *backref;
957 while (!list_empty(&rec->backrefs)) {
958 backref = to_inode_backref(rec->backrefs.next);
959 list_del(&backref->list);
962 free_orphan_data_extents(&rec->orphan_extents);
963 free_file_extent_holes(&rec->holes);
967 static int can_free_inode_rec(struct inode_record *rec)
969 if (!rec->errors && rec->checked && rec->found_inode_item &&
970 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
975 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
976 struct inode_record *rec)
978 struct cache_extent *cache;
979 struct inode_backref *tmp, *backref;
980 struct ptr_node *node;
981 unsigned char filetype;
983 if (!rec->found_inode_item)
986 filetype = imode_to_type(rec->imode);
987 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
988 if (backref->found_dir_item && backref->found_dir_index) {
989 if (backref->filetype != filetype)
990 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
991 if (!backref->errors && backref->found_inode_ref &&
992 rec->nlink == rec->found_link) {
993 list_del(&backref->list);
999 if (!rec->checked || rec->merging)
1002 if (S_ISDIR(rec->imode)) {
1003 if (rec->found_size != rec->isize)
1004 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1005 if (rec->found_file_extent)
1006 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1007 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1008 if (rec->found_dir_item)
1009 rec->errors |= I_ERR_ODD_DIR_ITEM;
1010 if (rec->found_size != rec->nbytes)
1011 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1012 if (rec->nlink > 0 && !no_holes &&
1013 (rec->extent_end < rec->isize ||
1014 first_extent_gap(&rec->holes) < rec->isize))
1015 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1018 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1019 if (rec->found_csum_item && rec->nodatasum)
1020 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1021 if (rec->some_csum_missing && !rec->nodatasum)
1022 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1025 BUG_ON(rec->refs != 1);
1026 if (can_free_inode_rec(rec)) {
1027 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1028 node = container_of(cache, struct ptr_node, cache);
1029 BUG_ON(node->data != rec);
1030 remove_cache_extent(inode_cache, &node->cache);
1032 free_inode_rec(rec);
1036 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1038 struct btrfs_path path;
1039 struct btrfs_key key;
1042 key.objectid = BTRFS_ORPHAN_OBJECTID;
1043 key.type = BTRFS_ORPHAN_ITEM_KEY;
1046 btrfs_init_path(&path);
1047 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1048 btrfs_release_path(&path);
1054 static int process_inode_item(struct extent_buffer *eb,
1055 int slot, struct btrfs_key *key,
1056 struct shared_node *active_node)
1058 struct inode_record *rec;
1059 struct btrfs_inode_item *item;
1061 rec = active_node->current;
1062 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1063 if (rec->found_inode_item) {
1064 rec->errors |= I_ERR_DUP_INODE_ITEM;
1067 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1068 rec->nlink = btrfs_inode_nlink(eb, item);
1069 rec->isize = btrfs_inode_size(eb, item);
1070 rec->nbytes = btrfs_inode_nbytes(eb, item);
1071 rec->imode = btrfs_inode_mode(eb, item);
1072 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1074 rec->found_inode_item = 1;
1075 if (rec->nlink == 0)
1076 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1077 maybe_free_inode_rec(&active_node->inode_cache, rec);
1081 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1083 int namelen, u64 dir)
1085 struct inode_backref *backref;
1087 list_for_each_entry(backref, &rec->backrefs, list) {
1088 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1090 if (backref->dir != dir || backref->namelen != namelen)
1092 if (memcmp(name, backref->name, namelen))
1097 backref = malloc(sizeof(*backref) + namelen + 1);
1100 memset(backref, 0, sizeof(*backref));
1102 backref->namelen = namelen;
1103 memcpy(backref->name, name, namelen);
1104 backref->name[namelen] = '\0';
1105 list_add_tail(&backref->list, &rec->backrefs);
1109 static int add_inode_backref(struct cache_tree *inode_cache,
1110 u64 ino, u64 dir, u64 index,
1111 const char *name, int namelen,
1112 int filetype, int itemtype, int errors)
1114 struct inode_record *rec;
1115 struct inode_backref *backref;
1117 rec = get_inode_rec(inode_cache, ino, 1);
1118 BUG_ON(IS_ERR(rec));
1119 backref = get_inode_backref(rec, name, namelen, dir);
1122 backref->errors |= errors;
1123 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1124 if (backref->found_dir_index)
1125 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1126 if (backref->found_inode_ref && backref->index != index)
1127 backref->errors |= REF_ERR_INDEX_UNMATCH;
1128 if (backref->found_dir_item && backref->filetype != filetype)
1129 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1131 backref->index = index;
1132 backref->filetype = filetype;
1133 backref->found_dir_index = 1;
1134 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1136 if (backref->found_dir_item)
1137 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1138 if (backref->found_dir_index && backref->filetype != filetype)
1139 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1141 backref->filetype = filetype;
1142 backref->found_dir_item = 1;
1143 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1144 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1145 if (backref->found_inode_ref)
1146 backref->errors |= REF_ERR_DUP_INODE_REF;
1147 if (backref->found_dir_index && backref->index != index)
1148 backref->errors |= REF_ERR_INDEX_UNMATCH;
1150 backref->index = index;
1152 backref->ref_type = itemtype;
1153 backref->found_inode_ref = 1;
1158 maybe_free_inode_rec(inode_cache, rec);
1162 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1163 struct cache_tree *dst_cache)
1165 struct inode_backref *backref;
1170 list_for_each_entry(backref, &src->backrefs, list) {
1171 if (backref->found_dir_index) {
1172 add_inode_backref(dst_cache, dst->ino, backref->dir,
1173 backref->index, backref->name,
1174 backref->namelen, backref->filetype,
1175 BTRFS_DIR_INDEX_KEY, backref->errors);
1177 if (backref->found_dir_item) {
1179 add_inode_backref(dst_cache, dst->ino,
1180 backref->dir, 0, backref->name,
1181 backref->namelen, backref->filetype,
1182 BTRFS_DIR_ITEM_KEY, backref->errors);
1184 if (backref->found_inode_ref) {
1185 add_inode_backref(dst_cache, dst->ino,
1186 backref->dir, backref->index,
1187 backref->name, backref->namelen, 0,
1188 backref->ref_type, backref->errors);
1192 if (src->found_dir_item)
1193 dst->found_dir_item = 1;
1194 if (src->found_file_extent)
1195 dst->found_file_extent = 1;
1196 if (src->found_csum_item)
1197 dst->found_csum_item = 1;
1198 if (src->some_csum_missing)
1199 dst->some_csum_missing = 1;
1200 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1201 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1206 BUG_ON(src->found_link < dir_count);
1207 dst->found_link += src->found_link - dir_count;
1208 dst->found_size += src->found_size;
1209 if (src->extent_start != (u64)-1) {
1210 if (dst->extent_start == (u64)-1) {
1211 dst->extent_start = src->extent_start;
1212 dst->extent_end = src->extent_end;
1214 if (dst->extent_end > src->extent_start)
1215 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1216 else if (dst->extent_end < src->extent_start) {
1217 ret = add_file_extent_hole(&dst->holes,
1219 src->extent_start - dst->extent_end);
1221 if (dst->extent_end < src->extent_end)
1222 dst->extent_end = src->extent_end;
1226 dst->errors |= src->errors;
1227 if (src->found_inode_item) {
1228 if (!dst->found_inode_item) {
1229 dst->nlink = src->nlink;
1230 dst->isize = src->isize;
1231 dst->nbytes = src->nbytes;
1232 dst->imode = src->imode;
1233 dst->nodatasum = src->nodatasum;
1234 dst->found_inode_item = 1;
1236 dst->errors |= I_ERR_DUP_INODE_ITEM;
1244 static int splice_shared_node(struct shared_node *src_node,
1245 struct shared_node *dst_node)
1247 struct cache_extent *cache;
1248 struct ptr_node *node, *ins;
1249 struct cache_tree *src, *dst;
1250 struct inode_record *rec, *conflict;
1251 u64 current_ino = 0;
1255 if (--src_node->refs == 0)
1257 if (src_node->current)
1258 current_ino = src_node->current->ino;
1260 src = &src_node->root_cache;
1261 dst = &dst_node->root_cache;
1263 cache = search_cache_extent(src, 0);
1265 node = container_of(cache, struct ptr_node, cache);
1267 cache = next_cache_extent(cache);
1270 remove_cache_extent(src, &node->cache);
1273 ins = malloc(sizeof(*ins));
1275 ins->cache.start = node->cache.start;
1276 ins->cache.size = node->cache.size;
1280 ret = insert_cache_extent(dst, &ins->cache);
1281 if (ret == -EEXIST) {
1282 conflict = get_inode_rec(dst, rec->ino, 1);
1283 BUG_ON(IS_ERR(conflict));
1284 merge_inode_recs(rec, conflict, dst);
1286 conflict->checked = 1;
1287 if (dst_node->current == conflict)
1288 dst_node->current = NULL;
1290 maybe_free_inode_rec(dst, conflict);
1291 free_inode_rec(rec);
1298 if (src == &src_node->root_cache) {
1299 src = &src_node->inode_cache;
1300 dst = &dst_node->inode_cache;
1304 if (current_ino > 0 && (!dst_node->current ||
1305 current_ino > dst_node->current->ino)) {
1306 if (dst_node->current) {
1307 dst_node->current->checked = 1;
1308 maybe_free_inode_rec(dst, dst_node->current);
1310 dst_node->current = get_inode_rec(dst, current_ino, 1);
1311 BUG_ON(IS_ERR(dst_node->current));
1316 static void free_inode_ptr(struct cache_extent *cache)
1318 struct ptr_node *node;
1319 struct inode_record *rec;
1321 node = container_of(cache, struct ptr_node, cache);
1323 free_inode_rec(rec);
1327 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1329 static struct shared_node *find_shared_node(struct cache_tree *shared,
1332 struct cache_extent *cache;
1333 struct shared_node *node;
1335 cache = lookup_cache_extent(shared, bytenr, 1);
1337 node = container_of(cache, struct shared_node, cache);
1343 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1346 struct shared_node *node;
1348 node = calloc(1, sizeof(*node));
1351 node->cache.start = bytenr;
1352 node->cache.size = 1;
1353 cache_tree_init(&node->root_cache);
1354 cache_tree_init(&node->inode_cache);
1357 ret = insert_cache_extent(shared, &node->cache);
1362 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1363 struct walk_control *wc, int level)
1365 struct shared_node *node;
1366 struct shared_node *dest;
1369 if (level == wc->active_node)
1372 BUG_ON(wc->active_node <= level);
1373 node = find_shared_node(&wc->shared, bytenr);
1375 ret = add_shared_node(&wc->shared, bytenr, refs);
1377 node = find_shared_node(&wc->shared, bytenr);
1378 wc->nodes[level] = node;
1379 wc->active_node = level;
1383 if (wc->root_level == wc->active_node &&
1384 btrfs_root_refs(&root->root_item) == 0) {
1385 if (--node->refs == 0) {
1386 free_inode_recs_tree(&node->root_cache);
1387 free_inode_recs_tree(&node->inode_cache);
1388 remove_cache_extent(&wc->shared, &node->cache);
1394 dest = wc->nodes[wc->active_node];
1395 splice_shared_node(node, dest);
1396 if (node->refs == 0) {
1397 remove_cache_extent(&wc->shared, &node->cache);
1403 static int leave_shared_node(struct btrfs_root *root,
1404 struct walk_control *wc, int level)
1406 struct shared_node *node;
1407 struct shared_node *dest;
1410 if (level == wc->root_level)
1413 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1417 BUG_ON(i >= BTRFS_MAX_LEVEL);
1419 node = wc->nodes[wc->active_node];
1420 wc->nodes[wc->active_node] = NULL;
1421 wc->active_node = i;
1423 dest = wc->nodes[wc->active_node];
1424 if (wc->active_node < wc->root_level ||
1425 btrfs_root_refs(&root->root_item) > 0) {
1426 BUG_ON(node->refs <= 1);
1427 splice_shared_node(node, dest);
1429 BUG_ON(node->refs < 2);
1438 * 1 - if the root with id child_root_id is a child of root parent_root_id
1439 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1440 * has other root(s) as parent(s)
1441 * 2 - if the root child_root_id doesn't have any parent roots
1443 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1446 struct btrfs_path path;
1447 struct btrfs_key key;
1448 struct extent_buffer *leaf;
1452 btrfs_init_path(&path);
1454 key.objectid = parent_root_id;
1455 key.type = BTRFS_ROOT_REF_KEY;
1456 key.offset = child_root_id;
1457 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1461 btrfs_release_path(&path);
1465 key.objectid = child_root_id;
1466 key.type = BTRFS_ROOT_BACKREF_KEY;
1468 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1474 leaf = path.nodes[0];
1475 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1476 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1479 leaf = path.nodes[0];
1482 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1483 if (key.objectid != child_root_id ||
1484 key.type != BTRFS_ROOT_BACKREF_KEY)
1489 if (key.offset == parent_root_id) {
1490 btrfs_release_path(&path);
1497 btrfs_release_path(&path);
1500 return has_parent ? 0 : 2;
1503 static int process_dir_item(struct btrfs_root *root,
1504 struct extent_buffer *eb,
1505 int slot, struct btrfs_key *key,
1506 struct shared_node *active_node)
1516 struct btrfs_dir_item *di;
1517 struct inode_record *rec;
1518 struct cache_tree *root_cache;
1519 struct cache_tree *inode_cache;
1520 struct btrfs_key location;
1521 char namebuf[BTRFS_NAME_LEN];
1523 root_cache = &active_node->root_cache;
1524 inode_cache = &active_node->inode_cache;
1525 rec = active_node->current;
1526 rec->found_dir_item = 1;
1528 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1529 total = btrfs_item_size_nr(eb, slot);
1530 while (cur < total) {
1532 btrfs_dir_item_key_to_cpu(eb, di, &location);
1533 name_len = btrfs_dir_name_len(eb, di);
1534 data_len = btrfs_dir_data_len(eb, di);
1535 filetype = btrfs_dir_type(eb, di);
1537 rec->found_size += name_len;
1538 if (name_len <= BTRFS_NAME_LEN) {
1542 len = BTRFS_NAME_LEN;
1543 error = REF_ERR_NAME_TOO_LONG;
1545 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1547 if (location.type == BTRFS_INODE_ITEM_KEY) {
1548 add_inode_backref(inode_cache, location.objectid,
1549 key->objectid, key->offset, namebuf,
1550 len, filetype, key->type, error);
1551 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1552 add_inode_backref(root_cache, location.objectid,
1553 key->objectid, key->offset,
1554 namebuf, len, filetype,
1557 fprintf(stderr, "invalid location in dir item %u\n",
1559 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1560 key->objectid, key->offset, namebuf,
1561 len, filetype, key->type, error);
1564 len = sizeof(*di) + name_len + data_len;
1565 di = (struct btrfs_dir_item *)((char *)di + len);
1568 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1569 rec->errors |= I_ERR_DUP_DIR_INDEX;
1574 static int process_inode_ref(struct extent_buffer *eb,
1575 int slot, struct btrfs_key *key,
1576 struct shared_node *active_node)
1584 struct cache_tree *inode_cache;
1585 struct btrfs_inode_ref *ref;
1586 char namebuf[BTRFS_NAME_LEN];
1588 inode_cache = &active_node->inode_cache;
1590 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1591 total = btrfs_item_size_nr(eb, slot);
1592 while (cur < total) {
1593 name_len = btrfs_inode_ref_name_len(eb, ref);
1594 index = btrfs_inode_ref_index(eb, ref);
1595 if (name_len <= BTRFS_NAME_LEN) {
1599 len = BTRFS_NAME_LEN;
1600 error = REF_ERR_NAME_TOO_LONG;
1602 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1603 add_inode_backref(inode_cache, key->objectid, key->offset,
1604 index, namebuf, len, 0, key->type, error);
1606 len = sizeof(*ref) + name_len;
1607 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1613 static int process_inode_extref(struct extent_buffer *eb,
1614 int slot, struct btrfs_key *key,
1615 struct shared_node *active_node)
1624 struct cache_tree *inode_cache;
1625 struct btrfs_inode_extref *extref;
1626 char namebuf[BTRFS_NAME_LEN];
1628 inode_cache = &active_node->inode_cache;
1630 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1631 total = btrfs_item_size_nr(eb, slot);
1632 while (cur < total) {
1633 name_len = btrfs_inode_extref_name_len(eb, extref);
1634 index = btrfs_inode_extref_index(eb, extref);
1635 parent = btrfs_inode_extref_parent(eb, extref);
1636 if (name_len <= BTRFS_NAME_LEN) {
1640 len = BTRFS_NAME_LEN;
1641 error = REF_ERR_NAME_TOO_LONG;
1643 read_extent_buffer(eb, namebuf,
1644 (unsigned long)(extref + 1), len);
1645 add_inode_backref(inode_cache, key->objectid, parent,
1646 index, namebuf, len, 0, key->type, error);
1648 len = sizeof(*extref) + name_len;
1649 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1656 static int count_csum_range(struct btrfs_root *root, u64 start,
1657 u64 len, u64 *found)
1659 struct btrfs_key key;
1660 struct btrfs_path path;
1661 struct extent_buffer *leaf;
1666 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1668 btrfs_init_path(&path);
1670 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1672 key.type = BTRFS_EXTENT_CSUM_KEY;
1674 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1678 if (ret > 0 && path.slots[0] > 0) {
1679 leaf = path.nodes[0];
1680 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1681 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1682 key.type == BTRFS_EXTENT_CSUM_KEY)
1687 leaf = path.nodes[0];
1688 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1689 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1694 leaf = path.nodes[0];
1697 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1698 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1699 key.type != BTRFS_EXTENT_CSUM_KEY)
1702 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1703 if (key.offset >= start + len)
1706 if (key.offset > start)
1709 size = btrfs_item_size_nr(leaf, path.slots[0]);
1710 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1711 if (csum_end > start) {
1712 size = min(csum_end - start, len);
1721 btrfs_release_path(&path);
1727 static int process_file_extent(struct btrfs_root *root,
1728 struct extent_buffer *eb,
1729 int slot, struct btrfs_key *key,
1730 struct shared_node *active_node)
1732 struct inode_record *rec;
1733 struct btrfs_file_extent_item *fi;
1735 u64 disk_bytenr = 0;
1736 u64 extent_offset = 0;
1737 u64 mask = root->sectorsize - 1;
1741 rec = active_node->current;
1742 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1743 rec->found_file_extent = 1;
1745 if (rec->extent_start == (u64)-1) {
1746 rec->extent_start = key->offset;
1747 rec->extent_end = key->offset;
1750 if (rec->extent_end > key->offset)
1751 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1752 else if (rec->extent_end < key->offset) {
1753 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1754 key->offset - rec->extent_end);
1759 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1760 extent_type = btrfs_file_extent_type(eb, fi);
1762 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1763 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1765 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1766 rec->found_size += num_bytes;
1767 num_bytes = (num_bytes + mask) & ~mask;
1768 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1769 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1770 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1771 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1772 extent_offset = btrfs_file_extent_offset(eb, fi);
1773 if (num_bytes == 0 || (num_bytes & mask))
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 if (num_bytes + extent_offset >
1776 btrfs_file_extent_ram_bytes(eb, fi))
1777 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1778 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1779 (btrfs_file_extent_compression(eb, fi) ||
1780 btrfs_file_extent_encryption(eb, fi) ||
1781 btrfs_file_extent_other_encoding(eb, fi)))
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 if (disk_bytenr > 0)
1784 rec->found_size += num_bytes;
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1788 rec->extent_end = key->offset + num_bytes;
1791 * The data reloc tree will copy full extents into its inode and then
1792 * copy the corresponding csums. Because the extent it copied could be
1793 * a preallocated extent that hasn't been written to yet there may be no
1794 * csums to copy, ergo we won't have csums for our file extent. This is
1795 * ok so just don't bother checking csums if the inode belongs to the
1798 if (disk_bytenr > 0 &&
1799 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1801 if (btrfs_file_extent_compression(eb, fi))
1802 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1804 disk_bytenr += extent_offset;
1806 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1809 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1811 rec->found_csum_item = 1;
1812 if (found < num_bytes)
1813 rec->some_csum_missing = 1;
1814 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1816 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1822 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1823 struct walk_control *wc)
1825 struct btrfs_key key;
1829 struct cache_tree *inode_cache;
1830 struct shared_node *active_node;
1832 if (wc->root_level == wc->active_node &&
1833 btrfs_root_refs(&root->root_item) == 0)
1836 active_node = wc->nodes[wc->active_node];
1837 inode_cache = &active_node->inode_cache;
1838 nritems = btrfs_header_nritems(eb);
1839 for (i = 0; i < nritems; i++) {
1840 btrfs_item_key_to_cpu(eb, &key, i);
1842 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1844 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1847 if (active_node->current == NULL ||
1848 active_node->current->ino < key.objectid) {
1849 if (active_node->current) {
1850 active_node->current->checked = 1;
1851 maybe_free_inode_rec(inode_cache,
1852 active_node->current);
1854 active_node->current = get_inode_rec(inode_cache,
1856 BUG_ON(IS_ERR(active_node->current));
1859 case BTRFS_DIR_ITEM_KEY:
1860 case BTRFS_DIR_INDEX_KEY:
1861 ret = process_dir_item(root, eb, i, &key, active_node);
1863 case BTRFS_INODE_REF_KEY:
1864 ret = process_inode_ref(eb, i, &key, active_node);
1866 case BTRFS_INODE_EXTREF_KEY:
1867 ret = process_inode_extref(eb, i, &key, active_node);
1869 case BTRFS_INODE_ITEM_KEY:
1870 ret = process_inode_item(eb, i, &key, active_node);
1872 case BTRFS_EXTENT_DATA_KEY:
1873 ret = process_file_extent(root, eb, i, &key,
1883 static void reada_walk_down(struct btrfs_root *root,
1884 struct extent_buffer *node, int slot)
1893 level = btrfs_header_level(node);
1897 nritems = btrfs_header_nritems(node);
1898 blocksize = root->nodesize;
1899 for (i = slot; i < nritems; i++) {
1900 bytenr = btrfs_node_blockptr(node, i);
1901 ptr_gen = btrfs_node_ptr_generation(node, i);
1902 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1907 * Check the child node/leaf by the following condition:
1908 * 1. the first item key of the node/leaf should be the same with the one
1910 * 2. block in parent node should match the child node/leaf.
1911 * 3. generation of parent node and child's header should be consistent.
1913 * Or the child node/leaf pointed by the key in parent is not valid.
1915 * We hope to check leaf owner too, but since subvol may share leaves,
1916 * which makes leaf owner check not so strong, key check should be
1917 * sufficient enough for that case.
1919 static int check_child_node(struct btrfs_root *root,
1920 struct extent_buffer *parent, int slot,
1921 struct extent_buffer *child)
1923 struct btrfs_key parent_key;
1924 struct btrfs_key child_key;
1927 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1928 if (btrfs_header_level(child) == 0)
1929 btrfs_item_key_to_cpu(child, &child_key, 0);
1931 btrfs_node_key_to_cpu(child, &child_key, 0);
1933 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1936 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1937 parent_key.objectid, parent_key.type, parent_key.offset,
1938 child_key.objectid, child_key.type, child_key.offset);
1940 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1942 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1943 btrfs_node_blockptr(parent, slot),
1944 btrfs_header_bytenr(child));
1946 if (btrfs_node_ptr_generation(parent, slot) !=
1947 btrfs_header_generation(child)) {
1949 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1950 btrfs_header_generation(child),
1951 btrfs_node_ptr_generation(parent, slot));
1957 u64 bytenr[BTRFS_MAX_LEVEL];
1958 u64 refs[BTRFS_MAX_LEVEL];
1961 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1962 struct walk_control *wc, int *level,
1963 struct node_refs *nrefs)
1965 enum btrfs_tree_block_status status;
1968 struct extent_buffer *next;
1969 struct extent_buffer *cur;
1974 WARN_ON(*level < 0);
1975 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1977 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1978 refs = nrefs->refs[*level];
1981 ret = btrfs_lookup_extent_info(NULL, root,
1982 path->nodes[*level]->start,
1983 *level, 1, &refs, NULL);
1988 nrefs->bytenr[*level] = path->nodes[*level]->start;
1989 nrefs->refs[*level] = refs;
1993 ret = enter_shared_node(root, path->nodes[*level]->start,
2001 while (*level >= 0) {
2002 WARN_ON(*level < 0);
2003 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2004 cur = path->nodes[*level];
2006 if (btrfs_header_level(cur) != *level)
2009 if (path->slots[*level] >= btrfs_header_nritems(cur))
2012 ret = process_one_leaf(root, cur, wc);
2017 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2018 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2019 blocksize = root->nodesize;
2021 if (bytenr == nrefs->bytenr[*level - 1]) {
2022 refs = nrefs->refs[*level - 1];
2024 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2025 *level - 1, 1, &refs, NULL);
2029 nrefs->bytenr[*level - 1] = bytenr;
2030 nrefs->refs[*level - 1] = refs;
2035 ret = enter_shared_node(root, bytenr, refs,
2038 path->slots[*level]++;
2043 next = btrfs_find_tree_block(root, bytenr, blocksize);
2044 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2045 free_extent_buffer(next);
2046 reada_walk_down(root, cur, path->slots[*level]);
2047 next = read_tree_block(root, bytenr, blocksize,
2049 if (!extent_buffer_uptodate(next)) {
2050 struct btrfs_key node_key;
2052 btrfs_node_key_to_cpu(path->nodes[*level],
2054 path->slots[*level]);
2055 btrfs_add_corrupt_extent_record(root->fs_info,
2057 path->nodes[*level]->start,
2058 root->nodesize, *level);
2064 ret = check_child_node(root, cur, path->slots[*level], next);
2070 if (btrfs_is_leaf(next))
2071 status = btrfs_check_leaf(root, NULL, next);
2073 status = btrfs_check_node(root, NULL, next);
2074 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2075 free_extent_buffer(next);
2080 *level = *level - 1;
2081 free_extent_buffer(path->nodes[*level]);
2082 path->nodes[*level] = next;
2083 path->slots[*level] = 0;
2086 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2090 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2091 struct walk_control *wc, int *level)
2094 struct extent_buffer *leaf;
2096 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2097 leaf = path->nodes[i];
2098 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2103 free_extent_buffer(path->nodes[*level]);
2104 path->nodes[*level] = NULL;
2105 BUG_ON(*level > wc->active_node);
2106 if (*level == wc->active_node)
2107 leave_shared_node(root, wc, *level);
2114 static int check_root_dir(struct inode_record *rec)
2116 struct inode_backref *backref;
2119 if (!rec->found_inode_item || rec->errors)
2121 if (rec->nlink != 1 || rec->found_link != 0)
2123 if (list_empty(&rec->backrefs))
2125 backref = to_inode_backref(rec->backrefs.next);
2126 if (!backref->found_inode_ref)
2128 if (backref->index != 0 || backref->namelen != 2 ||
2129 memcmp(backref->name, "..", 2))
2131 if (backref->found_dir_index || backref->found_dir_item)
2138 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2139 struct btrfs_root *root, struct btrfs_path *path,
2140 struct inode_record *rec)
2142 struct btrfs_inode_item *ei;
2143 struct btrfs_key key;
2146 key.objectid = rec->ino;
2147 key.type = BTRFS_INODE_ITEM_KEY;
2148 key.offset = (u64)-1;
2150 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2154 if (!path->slots[0]) {
2161 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2162 if (key.objectid != rec->ino) {
2167 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2168 struct btrfs_inode_item);
2169 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2170 btrfs_mark_buffer_dirty(path->nodes[0]);
2171 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2172 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2173 root->root_key.objectid);
2175 btrfs_release_path(path);
2179 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2180 struct btrfs_root *root,
2181 struct btrfs_path *path,
2182 struct inode_record *rec)
2186 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2187 btrfs_release_path(path);
2189 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2193 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2194 struct btrfs_root *root,
2195 struct btrfs_path *path,
2196 struct inode_record *rec)
2198 struct btrfs_inode_item *ei;
2199 struct btrfs_key key;
2202 key.objectid = rec->ino;
2203 key.type = BTRFS_INODE_ITEM_KEY;
2206 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2213 /* Since ret == 0, no need to check anything */
2214 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2215 struct btrfs_inode_item);
2216 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2217 btrfs_mark_buffer_dirty(path->nodes[0]);
2218 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2219 printf("reset nbytes for ino %llu root %llu\n",
2220 rec->ino, root->root_key.objectid);
2222 btrfs_release_path(path);
2226 static int add_missing_dir_index(struct btrfs_root *root,
2227 struct cache_tree *inode_cache,
2228 struct inode_record *rec,
2229 struct inode_backref *backref)
2231 struct btrfs_path *path;
2232 struct btrfs_trans_handle *trans;
2233 struct btrfs_dir_item *dir_item;
2234 struct extent_buffer *leaf;
2235 struct btrfs_key key;
2236 struct btrfs_disk_key disk_key;
2237 struct inode_record *dir_rec;
2238 unsigned long name_ptr;
2239 u32 data_size = sizeof(*dir_item) + backref->namelen;
2242 path = btrfs_alloc_path();
2246 trans = btrfs_start_transaction(root, 1);
2247 if (IS_ERR(trans)) {
2248 btrfs_free_path(path);
2249 return PTR_ERR(trans);
2252 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2253 (unsigned long long)rec->ino);
2254 key.objectid = backref->dir;
2255 key.type = BTRFS_DIR_INDEX_KEY;
2256 key.offset = backref->index;
2258 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2261 leaf = path->nodes[0];
2262 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2264 disk_key.objectid = cpu_to_le64(rec->ino);
2265 disk_key.type = BTRFS_INODE_ITEM_KEY;
2266 disk_key.offset = 0;
2268 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2269 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2270 btrfs_set_dir_data_len(leaf, dir_item, 0);
2271 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2272 name_ptr = (unsigned long)(dir_item + 1);
2273 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2274 btrfs_mark_buffer_dirty(leaf);
2275 btrfs_free_path(path);
2276 btrfs_commit_transaction(trans, root);
2278 backref->found_dir_index = 1;
2279 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2280 BUG_ON(IS_ERR(dir_rec));
2283 dir_rec->found_size += backref->namelen;
2284 if (dir_rec->found_size == dir_rec->isize &&
2285 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2286 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2287 if (dir_rec->found_size != dir_rec->isize)
2288 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2293 static int delete_dir_index(struct btrfs_root *root,
2294 struct cache_tree *inode_cache,
2295 struct inode_record *rec,
2296 struct inode_backref *backref)
2298 struct btrfs_trans_handle *trans;
2299 struct btrfs_dir_item *di;
2300 struct btrfs_path *path;
2303 path = btrfs_alloc_path();
2307 trans = btrfs_start_transaction(root, 1);
2308 if (IS_ERR(trans)) {
2309 btrfs_free_path(path);
2310 return PTR_ERR(trans);
2314 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2315 (unsigned long long)backref->dir,
2316 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2317 (unsigned long long)root->objectid);
2319 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2320 backref->name, backref->namelen,
2321 backref->index, -1);
2324 btrfs_free_path(path);
2325 btrfs_commit_transaction(trans, root);
2332 ret = btrfs_del_item(trans, root, path);
2334 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2336 btrfs_free_path(path);
2337 btrfs_commit_transaction(trans, root);
2341 static int create_inode_item(struct btrfs_root *root,
2342 struct inode_record *rec,
2343 struct inode_backref *backref, int root_dir)
2345 struct btrfs_trans_handle *trans;
2346 struct btrfs_inode_item inode_item;
2347 time_t now = time(NULL);
2350 trans = btrfs_start_transaction(root, 1);
2351 if (IS_ERR(trans)) {
2352 ret = PTR_ERR(trans);
2356 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2357 "be incomplete, please check permissions and content after "
2358 "the fsck completes.\n", (unsigned long long)root->objectid,
2359 (unsigned long long)rec->ino);
2361 memset(&inode_item, 0, sizeof(inode_item));
2362 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2364 btrfs_set_stack_inode_nlink(&inode_item, 1);
2366 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2367 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2368 if (rec->found_dir_item) {
2369 if (rec->found_file_extent)
2370 fprintf(stderr, "root %llu inode %llu has both a dir "
2371 "item and extents, unsure if it is a dir or a "
2372 "regular file so setting it as a directory\n",
2373 (unsigned long long)root->objectid,
2374 (unsigned long long)rec->ino);
2375 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2376 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2377 } else if (!rec->found_dir_item) {
2378 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2379 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2381 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2382 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2383 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2384 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2385 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2386 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2387 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2388 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2390 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2392 btrfs_commit_transaction(trans, root);
2396 static int repair_inode_backrefs(struct btrfs_root *root,
2397 struct inode_record *rec,
2398 struct cache_tree *inode_cache,
2401 struct inode_backref *tmp, *backref;
2402 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2406 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2407 if (!delete && rec->ino == root_dirid) {
2408 if (!rec->found_inode_item) {
2409 ret = create_inode_item(root, rec, backref, 1);
2416 /* Index 0 for root dir's are special, don't mess with it */
2417 if (rec->ino == root_dirid && backref->index == 0)
2421 ((backref->found_dir_index && !backref->found_inode_ref) ||
2422 (backref->found_dir_index && backref->found_inode_ref &&
2423 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2424 ret = delete_dir_index(root, inode_cache, rec, backref);
2428 list_del(&backref->list);
2432 if (!delete && !backref->found_dir_index &&
2433 backref->found_dir_item && backref->found_inode_ref) {
2434 ret = add_missing_dir_index(root, inode_cache, rec,
2439 if (backref->found_dir_item &&
2440 backref->found_dir_index &&
2441 backref->found_dir_index) {
2442 if (!backref->errors &&
2443 backref->found_inode_ref) {
2444 list_del(&backref->list);
2450 if (!delete && (!backref->found_dir_index &&
2451 !backref->found_dir_item &&
2452 backref->found_inode_ref)) {
2453 struct btrfs_trans_handle *trans;
2454 struct btrfs_key location;
2456 ret = check_dir_conflict(root, backref->name,
2462 * let nlink fixing routine to handle it,
2463 * which can do it better.
2468 location.objectid = rec->ino;
2469 location.type = BTRFS_INODE_ITEM_KEY;
2470 location.offset = 0;
2472 trans = btrfs_start_transaction(root, 1);
2473 if (IS_ERR(trans)) {
2474 ret = PTR_ERR(trans);
2477 fprintf(stderr, "adding missing dir index/item pair "
2479 (unsigned long long)rec->ino);
2480 ret = btrfs_insert_dir_item(trans, root, backref->name,
2482 backref->dir, &location,
2483 imode_to_type(rec->imode),
2486 btrfs_commit_transaction(trans, root);
2490 if (!delete && (backref->found_inode_ref &&
2491 backref->found_dir_index &&
2492 backref->found_dir_item &&
2493 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2494 !rec->found_inode_item)) {
2495 ret = create_inode_item(root, rec, backref, 0);
2502 return ret ? ret : repaired;
2506 * To determine the file type for nlink/inode_item repair
2508 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2509 * Return -ENOENT if file type is not found.
2511 static int find_file_type(struct inode_record *rec, u8 *type)
2513 struct inode_backref *backref;
2515 /* For inode item recovered case */
2516 if (rec->found_inode_item) {
2517 *type = imode_to_type(rec->imode);
2521 list_for_each_entry(backref, &rec->backrefs, list) {
2522 if (backref->found_dir_index || backref->found_dir_item) {
2523 *type = backref->filetype;
2531 * To determine the file name for nlink repair
2533 * Return 0 if file name is found, set name and namelen.
2534 * Return -ENOENT if file name is not found.
2536 static int find_file_name(struct inode_record *rec,
2537 char *name, int *namelen)
2539 struct inode_backref *backref;
2541 list_for_each_entry(backref, &rec->backrefs, list) {
2542 if (backref->found_dir_index || backref->found_dir_item ||
2543 backref->found_inode_ref) {
2544 memcpy(name, backref->name, backref->namelen);
2545 *namelen = backref->namelen;
2552 /* Reset the nlink of the inode to the correct one */
2553 static int reset_nlink(struct btrfs_trans_handle *trans,
2554 struct btrfs_root *root,
2555 struct btrfs_path *path,
2556 struct inode_record *rec)
2558 struct inode_backref *backref;
2559 struct inode_backref *tmp;
2560 struct btrfs_key key;
2561 struct btrfs_inode_item *inode_item;
2564 /* We don't believe this either, reset it and iterate backref */
2565 rec->found_link = 0;
2567 /* Remove all backref including the valid ones */
2568 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2569 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2570 backref->index, backref->name,
2571 backref->namelen, 0);
2575 /* remove invalid backref, so it won't be added back */
2576 if (!(backref->found_dir_index &&
2577 backref->found_dir_item &&
2578 backref->found_inode_ref)) {
2579 list_del(&backref->list);
2586 /* Set nlink to 0 */
2587 key.objectid = rec->ino;
2588 key.type = BTRFS_INODE_ITEM_KEY;
2590 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2597 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2598 struct btrfs_inode_item);
2599 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2600 btrfs_mark_buffer_dirty(path->nodes[0]);
2601 btrfs_release_path(path);
2604 * Add back valid inode_ref/dir_item/dir_index,
2605 * add_link() will handle the nlink inc, so new nlink must be correct
2607 list_for_each_entry(backref, &rec->backrefs, list) {
2608 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2609 backref->name, backref->namelen,
2610 backref->filetype, &backref->index, 1);
2615 btrfs_release_path(path);
2619 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2620 struct btrfs_root *root,
2621 struct btrfs_path *path,
2622 struct inode_record *rec)
2624 char *dir_name = "lost+found";
2625 char namebuf[BTRFS_NAME_LEN] = {0};
2630 int name_recovered = 0;
2631 int type_recovered = 0;
2635 * Get file name and type first before these invalid inode ref
2636 * are deleted by remove_all_invalid_backref()
2638 name_recovered = !find_file_name(rec, namebuf, &namelen);
2639 type_recovered = !find_file_type(rec, &type);
2641 if (!name_recovered) {
2642 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2643 rec->ino, rec->ino);
2644 namelen = count_digits(rec->ino);
2645 sprintf(namebuf, "%llu", rec->ino);
2648 if (!type_recovered) {
2649 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2651 type = BTRFS_FT_REG_FILE;
2655 ret = reset_nlink(trans, root, path, rec);
2658 "Failed to reset nlink for inode %llu: %s\n",
2659 rec->ino, strerror(-ret));
2663 if (rec->found_link == 0) {
2664 lost_found_ino = root->highest_inode;
2665 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2670 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2671 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2674 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2675 dir_name, strerror(-ret));
2678 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2679 namebuf, namelen, type, NULL, 1);
2681 * Add ".INO" suffix several times to handle case where
2682 * "FILENAME.INO" is already taken by another file.
2684 while (ret == -EEXIST) {
2686 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2688 if (namelen + count_digits(rec->ino) + 1 >
2693 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2695 namelen += count_digits(rec->ino) + 1;
2696 ret = btrfs_add_link(trans, root, rec->ino,
2697 lost_found_ino, namebuf,
2698 namelen, type, NULL, 1);
2702 "Failed to link the inode %llu to %s dir: %s\n",
2703 rec->ino, dir_name, strerror(-ret));
2707 * Just increase the found_link, don't actually add the
2708 * backref. This will make things easier and this inode
2709 * record will be freed after the repair is done.
2710 * So fsck will not report problem about this inode.
2713 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2714 namelen, namebuf, dir_name);
2716 printf("Fixed the nlink of inode %llu\n", rec->ino);
2719 * Clear the flag anyway, or we will loop forever for the same inode
2720 * as it will not be removed from the bad inode list and the dead loop
2723 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2724 btrfs_release_path(path);
2729 * Check if there is any normal(reg or prealloc) file extent for given
2731 * This is used to determine the file type when neither its dir_index/item or
2732 * inode_item exists.
2734 * This will *NOT* report error, if any error happens, just consider it does
2735 * not have any normal file extent.
2737 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2739 struct btrfs_path *path;
2740 struct btrfs_key key;
2741 struct btrfs_key found_key;
2742 struct btrfs_file_extent_item *fi;
2746 path = btrfs_alloc_path();
2750 key.type = BTRFS_EXTENT_DATA_KEY;
2753 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2758 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2759 ret = btrfs_next_leaf(root, path);
2766 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2768 if (found_key.objectid != ino ||
2769 found_key.type != BTRFS_EXTENT_DATA_KEY)
2771 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2772 struct btrfs_file_extent_item);
2773 type = btrfs_file_extent_type(path->nodes[0], fi);
2774 if (type != BTRFS_FILE_EXTENT_INLINE) {
2780 btrfs_free_path(path);
2784 static u32 btrfs_type_to_imode(u8 type)
2786 static u32 imode_by_btrfs_type[] = {
2787 [BTRFS_FT_REG_FILE] = S_IFREG,
2788 [BTRFS_FT_DIR] = S_IFDIR,
2789 [BTRFS_FT_CHRDEV] = S_IFCHR,
2790 [BTRFS_FT_BLKDEV] = S_IFBLK,
2791 [BTRFS_FT_FIFO] = S_IFIFO,
2792 [BTRFS_FT_SOCK] = S_IFSOCK,
2793 [BTRFS_FT_SYMLINK] = S_IFLNK,
2796 return imode_by_btrfs_type[(type)];
2799 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2800 struct btrfs_root *root,
2801 struct btrfs_path *path,
2802 struct inode_record *rec)
2806 int type_recovered = 0;
2809 printf("Trying to rebuild inode:%llu\n", rec->ino);
2811 type_recovered = !find_file_type(rec, &filetype);
2814 * Try to determine inode type if type not found.
2816 * For found regular file extent, it must be FILE.
2817 * For found dir_item/index, it must be DIR.
2819 * For undetermined one, use FILE as fallback.
2822 * 1. If found backref(inode_index/item is already handled) to it,
2824 * Need new inode-inode ref structure to allow search for that.
2826 if (!type_recovered) {
2827 if (rec->found_file_extent &&
2828 find_normal_file_extent(root, rec->ino)) {
2830 filetype = BTRFS_FT_REG_FILE;
2831 } else if (rec->found_dir_item) {
2833 filetype = BTRFS_FT_DIR;
2834 } else if (!list_empty(&rec->orphan_extents)) {
2836 filetype = BTRFS_FT_REG_FILE;
2838 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2841 filetype = BTRFS_FT_REG_FILE;
2845 ret = btrfs_new_inode(trans, root, rec->ino,
2846 mode | btrfs_type_to_imode(filetype));
2851 * Here inode rebuild is done, we only rebuild the inode item,
2852 * don't repair the nlink(like move to lost+found).
2853 * That is the job of nlink repair.
2855 * We just fill the record and return
2857 rec->found_dir_item = 1;
2858 rec->imode = mode | btrfs_type_to_imode(filetype);
2860 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2861 /* Ensure the inode_nlinks repair function will be called */
2862 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2867 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2868 struct btrfs_root *root,
2869 struct btrfs_path *path,
2870 struct inode_record *rec)
2872 struct orphan_data_extent *orphan;
2873 struct orphan_data_extent *tmp;
2876 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2878 * Check for conflicting file extents
2880 * Here we don't know whether the extents is compressed or not,
2881 * so we can only assume it not compressed nor data offset,
2882 * and use its disk_len as extent length.
2884 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2885 orphan->offset, orphan->disk_len, 0);
2886 btrfs_release_path(path);
2891 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2892 orphan->disk_bytenr, orphan->disk_len);
2893 ret = btrfs_free_extent(trans,
2894 root->fs_info->extent_root,
2895 orphan->disk_bytenr, orphan->disk_len,
2896 0, root->objectid, orphan->objectid,
2901 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2902 orphan->offset, orphan->disk_bytenr,
2903 orphan->disk_len, orphan->disk_len);
2907 /* Update file size info */
2908 rec->found_size += orphan->disk_len;
2909 if (rec->found_size == rec->nbytes)
2910 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2912 /* Update the file extent hole info too */
2913 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2917 if (RB_EMPTY_ROOT(&rec->holes))
2918 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2920 list_del(&orphan->list);
2923 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2928 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2929 struct btrfs_root *root,
2930 struct btrfs_path *path,
2931 struct inode_record *rec)
2933 struct rb_node *node;
2934 struct file_extent_hole *hole;
2938 node = rb_first(&rec->holes);
2942 hole = rb_entry(node, struct file_extent_hole, node);
2943 ret = btrfs_punch_hole(trans, root, rec->ino,
2944 hole->start, hole->len);
2947 ret = del_file_extent_hole(&rec->holes, hole->start,
2951 if (RB_EMPTY_ROOT(&rec->holes))
2952 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2953 node = rb_first(&rec->holes);
2955 /* special case for a file losing all its file extent */
2957 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2958 round_up(rec->isize, root->sectorsize));
2962 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2963 rec->ino, root->objectid);
2968 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2970 struct btrfs_trans_handle *trans;
2971 struct btrfs_path *path;
2974 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2975 I_ERR_NO_ORPHAN_ITEM |
2976 I_ERR_LINK_COUNT_WRONG |
2977 I_ERR_NO_INODE_ITEM |
2978 I_ERR_FILE_EXTENT_ORPHAN |
2979 I_ERR_FILE_EXTENT_DISCOUNT|
2980 I_ERR_FILE_NBYTES_WRONG)))
2983 path = btrfs_alloc_path();
2988 * For nlink repair, it may create a dir and add link, so
2989 * 2 for parent(256)'s dir_index and dir_item
2990 * 2 for lost+found dir's inode_item and inode_ref
2991 * 1 for the new inode_ref of the file
2992 * 2 for lost+found dir's dir_index and dir_item for the file
2994 trans = btrfs_start_transaction(root, 7);
2995 if (IS_ERR(trans)) {
2996 btrfs_free_path(path);
2997 return PTR_ERR(trans);
3000 if (rec->errors & I_ERR_NO_INODE_ITEM)
3001 ret = repair_inode_no_item(trans, root, path, rec);
3002 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3003 ret = repair_inode_orphan_extent(trans, root, path, rec);
3004 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3005 ret = repair_inode_discount_extent(trans, root, path, rec);
3006 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3007 ret = repair_inode_isize(trans, root, path, rec);
3008 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3009 ret = repair_inode_orphan_item(trans, root, path, rec);
3010 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3011 ret = repair_inode_nlinks(trans, root, path, rec);
3012 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3013 ret = repair_inode_nbytes(trans, root, path, rec);
3014 btrfs_commit_transaction(trans, root);
3015 btrfs_free_path(path);
3019 static int check_inode_recs(struct btrfs_root *root,
3020 struct cache_tree *inode_cache)
3022 struct cache_extent *cache;
3023 struct ptr_node *node;
3024 struct inode_record *rec;
3025 struct inode_backref *backref;
3030 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3032 if (btrfs_root_refs(&root->root_item) == 0) {
3033 if (!cache_tree_empty(inode_cache))
3034 fprintf(stderr, "warning line %d\n", __LINE__);
3039 * We need to record the highest inode number for later 'lost+found'
3041 * We must select an ino not used/referred by any existing inode, or
3042 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3043 * this may cause 'lost+found' dir has wrong nlinks.
3045 cache = last_cache_extent(inode_cache);
3047 node = container_of(cache, struct ptr_node, cache);
3049 if (rec->ino > root->highest_inode)
3050 root->highest_inode = rec->ino;
3054 * We need to repair backrefs first because we could change some of the
3055 * errors in the inode recs.
3057 * We also need to go through and delete invalid backrefs first and then
3058 * add the correct ones second. We do this because we may get EEXIST
3059 * when adding back the correct index because we hadn't yet deleted the
3062 * For example, if we were missing a dir index then the directories
3063 * isize would be wrong, so if we fixed the isize to what we thought it
3064 * would be and then fixed the backref we'd still have a invalid fs, so
3065 * we need to add back the dir index and then check to see if the isize
3070 if (stage == 3 && !err)
3073 cache = search_cache_extent(inode_cache, 0);
3074 while (repair && cache) {
3075 node = container_of(cache, struct ptr_node, cache);
3077 cache = next_cache_extent(cache);
3079 /* Need to free everything up and rescan */
3081 remove_cache_extent(inode_cache, &node->cache);
3083 free_inode_rec(rec);
3087 if (list_empty(&rec->backrefs))
3090 ret = repair_inode_backrefs(root, rec, inode_cache,
3104 rec = get_inode_rec(inode_cache, root_dirid, 0);
3105 BUG_ON(IS_ERR(rec));
3107 ret = check_root_dir(rec);
3109 fprintf(stderr, "root %llu root dir %llu error\n",
3110 (unsigned long long)root->root_key.objectid,
3111 (unsigned long long)root_dirid);
3112 print_inode_error(root, rec);
3117 struct btrfs_trans_handle *trans;
3119 trans = btrfs_start_transaction(root, 1);
3120 if (IS_ERR(trans)) {
3121 err = PTR_ERR(trans);
3126 "root %llu missing its root dir, recreating\n",
3127 (unsigned long long)root->objectid);
3129 ret = btrfs_make_root_dir(trans, root, root_dirid);
3132 btrfs_commit_transaction(trans, root);
3136 fprintf(stderr, "root %llu root dir %llu not found\n",
3137 (unsigned long long)root->root_key.objectid,
3138 (unsigned long long)root_dirid);
3142 cache = search_cache_extent(inode_cache, 0);
3145 node = container_of(cache, struct ptr_node, cache);
3147 remove_cache_extent(inode_cache, &node->cache);
3149 if (rec->ino == root_dirid ||
3150 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3151 free_inode_rec(rec);
3155 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3156 ret = check_orphan_item(root, rec->ino);
3158 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3159 if (can_free_inode_rec(rec)) {
3160 free_inode_rec(rec);
3165 if (!rec->found_inode_item)
3166 rec->errors |= I_ERR_NO_INODE_ITEM;
3167 if (rec->found_link != rec->nlink)
3168 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3170 ret = try_repair_inode(root, rec);
3171 if (ret == 0 && can_free_inode_rec(rec)) {
3172 free_inode_rec(rec);
3178 if (!(repair && ret == 0))
3180 print_inode_error(root, rec);
3181 list_for_each_entry(backref, &rec->backrefs, list) {
3182 if (!backref->found_dir_item)
3183 backref->errors |= REF_ERR_NO_DIR_ITEM;
3184 if (!backref->found_dir_index)
3185 backref->errors |= REF_ERR_NO_DIR_INDEX;
3186 if (!backref->found_inode_ref)
3187 backref->errors |= REF_ERR_NO_INODE_REF;
3188 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3189 " namelen %u name %s filetype %d errors %x",
3190 (unsigned long long)backref->dir,
3191 (unsigned long long)backref->index,
3192 backref->namelen, backref->name,
3193 backref->filetype, backref->errors);
3194 print_ref_error(backref->errors);
3196 free_inode_rec(rec);
3198 return (error > 0) ? -1 : 0;
3201 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3204 struct cache_extent *cache;
3205 struct root_record *rec = NULL;
3208 cache = lookup_cache_extent(root_cache, objectid, 1);
3210 rec = container_of(cache, struct root_record, cache);
3212 rec = calloc(1, sizeof(*rec));
3214 return ERR_PTR(-ENOMEM);
3215 rec->objectid = objectid;
3216 INIT_LIST_HEAD(&rec->backrefs);
3217 rec->cache.start = objectid;
3218 rec->cache.size = 1;
3220 ret = insert_cache_extent(root_cache, &rec->cache);
3222 return ERR_PTR(-EEXIST);
3227 static struct root_backref *get_root_backref(struct root_record *rec,
3228 u64 ref_root, u64 dir, u64 index,
3229 const char *name, int namelen)
3231 struct root_backref *backref;
3233 list_for_each_entry(backref, &rec->backrefs, list) {
3234 if (backref->ref_root != ref_root || backref->dir != dir ||
3235 backref->namelen != namelen)
3237 if (memcmp(name, backref->name, namelen))
3242 backref = calloc(1, sizeof(*backref) + namelen + 1);
3245 backref->ref_root = ref_root;
3247 backref->index = index;
3248 backref->namelen = namelen;
3249 memcpy(backref->name, name, namelen);
3250 backref->name[namelen] = '\0';
3251 list_add_tail(&backref->list, &rec->backrefs);
3255 static void free_root_record(struct cache_extent *cache)
3257 struct root_record *rec;
3258 struct root_backref *backref;
3260 rec = container_of(cache, struct root_record, cache);
3261 while (!list_empty(&rec->backrefs)) {
3262 backref = to_root_backref(rec->backrefs.next);
3263 list_del(&backref->list);
3270 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3272 static int add_root_backref(struct cache_tree *root_cache,
3273 u64 root_id, u64 ref_root, u64 dir, u64 index,
3274 const char *name, int namelen,
3275 int item_type, int errors)
3277 struct root_record *rec;
3278 struct root_backref *backref;
3280 rec = get_root_rec(root_cache, root_id);
3281 BUG_ON(IS_ERR(rec));
3282 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3285 backref->errors |= errors;
3287 if (item_type != BTRFS_DIR_ITEM_KEY) {
3288 if (backref->found_dir_index || backref->found_back_ref ||
3289 backref->found_forward_ref) {
3290 if (backref->index != index)
3291 backref->errors |= REF_ERR_INDEX_UNMATCH;
3293 backref->index = index;
3297 if (item_type == BTRFS_DIR_ITEM_KEY) {
3298 if (backref->found_forward_ref)
3300 backref->found_dir_item = 1;
3301 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3302 backref->found_dir_index = 1;
3303 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3304 if (backref->found_forward_ref)
3305 backref->errors |= REF_ERR_DUP_ROOT_REF;
3306 else if (backref->found_dir_item)
3308 backref->found_forward_ref = 1;
3309 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3310 if (backref->found_back_ref)
3311 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3312 backref->found_back_ref = 1;
3317 if (backref->found_forward_ref && backref->found_dir_item)
3318 backref->reachable = 1;
3322 static int merge_root_recs(struct btrfs_root *root,
3323 struct cache_tree *src_cache,
3324 struct cache_tree *dst_cache)
3326 struct cache_extent *cache;
3327 struct ptr_node *node;
3328 struct inode_record *rec;
3329 struct inode_backref *backref;
3332 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3333 free_inode_recs_tree(src_cache);
3338 cache = search_cache_extent(src_cache, 0);
3341 node = container_of(cache, struct ptr_node, cache);
3343 remove_cache_extent(src_cache, &node->cache);
3346 ret = is_child_root(root, root->objectid, rec->ino);
3352 list_for_each_entry(backref, &rec->backrefs, list) {
3353 BUG_ON(backref->found_inode_ref);
3354 if (backref->found_dir_item)
3355 add_root_backref(dst_cache, rec->ino,
3356 root->root_key.objectid, backref->dir,
3357 backref->index, backref->name,
3358 backref->namelen, BTRFS_DIR_ITEM_KEY,
3360 if (backref->found_dir_index)
3361 add_root_backref(dst_cache, rec->ino,
3362 root->root_key.objectid, backref->dir,
3363 backref->index, backref->name,
3364 backref->namelen, BTRFS_DIR_INDEX_KEY,
3368 free_inode_rec(rec);
3375 static int check_root_refs(struct btrfs_root *root,
3376 struct cache_tree *root_cache)
3378 struct root_record *rec;
3379 struct root_record *ref_root;
3380 struct root_backref *backref;
3381 struct cache_extent *cache;
3387 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3388 BUG_ON(IS_ERR(rec));
3391 /* fixme: this can not detect circular references */
3394 cache = search_cache_extent(root_cache, 0);
3398 rec = container_of(cache, struct root_record, cache);
3399 cache = next_cache_extent(cache);
3401 if (rec->found_ref == 0)
3404 list_for_each_entry(backref, &rec->backrefs, list) {
3405 if (!backref->reachable)
3408 ref_root = get_root_rec(root_cache,
3410 BUG_ON(IS_ERR(ref_root));
3411 if (ref_root->found_ref > 0)
3414 backref->reachable = 0;
3416 if (rec->found_ref == 0)
3422 cache = search_cache_extent(root_cache, 0);
3426 rec = container_of(cache, struct root_record, cache);
3427 cache = next_cache_extent(cache);
3429 if (rec->found_ref == 0 &&
3430 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3431 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3432 ret = check_orphan_item(root->fs_info->tree_root,
3438 * If we don't have a root item then we likely just have
3439 * a dir item in a snapshot for this root but no actual
3440 * ref key or anything so it's meaningless.
3442 if (!rec->found_root_item)
3445 fprintf(stderr, "fs tree %llu not referenced\n",
3446 (unsigned long long)rec->objectid);
3450 if (rec->found_ref > 0 && !rec->found_root_item)
3452 list_for_each_entry(backref, &rec->backrefs, list) {
3453 if (!backref->found_dir_item)
3454 backref->errors |= REF_ERR_NO_DIR_ITEM;
3455 if (!backref->found_dir_index)
3456 backref->errors |= REF_ERR_NO_DIR_INDEX;
3457 if (!backref->found_back_ref)
3458 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3459 if (!backref->found_forward_ref)
3460 backref->errors |= REF_ERR_NO_ROOT_REF;
3461 if (backref->reachable && backref->errors)
3468 fprintf(stderr, "fs tree %llu refs %u %s\n",
3469 (unsigned long long)rec->objectid, rec->found_ref,
3470 rec->found_root_item ? "" : "not found");
3472 list_for_each_entry(backref, &rec->backrefs, list) {
3473 if (!backref->reachable)
3475 if (!backref->errors && rec->found_root_item)
3477 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3478 " index %llu namelen %u name %s errors %x\n",
3479 (unsigned long long)backref->ref_root,
3480 (unsigned long long)backref->dir,
3481 (unsigned long long)backref->index,
3482 backref->namelen, backref->name,
3484 print_ref_error(backref->errors);
3487 return errors > 0 ? 1 : 0;
3490 static int process_root_ref(struct extent_buffer *eb, int slot,
3491 struct btrfs_key *key,
3492 struct cache_tree *root_cache)
3498 struct btrfs_root_ref *ref;
3499 char namebuf[BTRFS_NAME_LEN];
3502 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3504 dirid = btrfs_root_ref_dirid(eb, ref);
3505 index = btrfs_root_ref_sequence(eb, ref);
3506 name_len = btrfs_root_ref_name_len(eb, ref);
3508 if (name_len <= BTRFS_NAME_LEN) {
3512 len = BTRFS_NAME_LEN;
3513 error = REF_ERR_NAME_TOO_LONG;
3515 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3517 if (key->type == BTRFS_ROOT_REF_KEY) {
3518 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3519 index, namebuf, len, key->type, error);
3521 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3522 index, namebuf, len, key->type, error);
3527 static void free_corrupt_block(struct cache_extent *cache)
3529 struct btrfs_corrupt_block *corrupt;
3531 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3535 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3538 * Repair the btree of the given root.
3540 * The fix is to remove the node key in corrupt_blocks cache_tree.
3541 * and rebalance the tree.
3542 * After the fix, the btree should be writeable.
3544 static int repair_btree(struct btrfs_root *root,
3545 struct cache_tree *corrupt_blocks)
3547 struct btrfs_trans_handle *trans;
3548 struct btrfs_path *path;
3549 struct btrfs_corrupt_block *corrupt;
3550 struct cache_extent *cache;
3551 struct btrfs_key key;
3556 if (cache_tree_empty(corrupt_blocks))
3559 path = btrfs_alloc_path();
3563 trans = btrfs_start_transaction(root, 1);
3564 if (IS_ERR(trans)) {
3565 ret = PTR_ERR(trans);
3566 fprintf(stderr, "Error starting transaction: %s\n",
3570 cache = first_cache_extent(corrupt_blocks);
3572 corrupt = container_of(cache, struct btrfs_corrupt_block,
3574 level = corrupt->level;
3575 path->lowest_level = level;
3576 key.objectid = corrupt->key.objectid;
3577 key.type = corrupt->key.type;
3578 key.offset = corrupt->key.offset;
3581 * Here we don't want to do any tree balance, since it may
3582 * cause a balance with corrupted brother leaf/node,
3583 * so ins_len set to 0 here.
3584 * Balance will be done after all corrupt node/leaf is deleted.
3586 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3589 offset = btrfs_node_blockptr(path->nodes[level],
3590 path->slots[level]);
3592 /* Remove the ptr */
3593 ret = btrfs_del_ptr(trans, root, path, level,
3594 path->slots[level]);
3598 * Remove the corresponding extent
3599 * return value is not concerned.
3601 btrfs_release_path(path);
3602 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3603 0, root->root_key.objectid,
3605 cache = next_cache_extent(cache);
3608 /* Balance the btree using btrfs_search_slot() */
3609 cache = first_cache_extent(corrupt_blocks);
3611 corrupt = container_of(cache, struct btrfs_corrupt_block,
3613 memcpy(&key, &corrupt->key, sizeof(key));
3614 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3617 /* return will always >0 since it won't find the item */
3619 btrfs_release_path(path);
3620 cache = next_cache_extent(cache);
3623 btrfs_commit_transaction(trans, root);
3625 btrfs_free_path(path);
3629 static int check_fs_root(struct btrfs_root *root,
3630 struct cache_tree *root_cache,
3631 struct walk_control *wc)
3637 struct btrfs_path path;
3638 struct shared_node root_node;
3639 struct root_record *rec;
3640 struct btrfs_root_item *root_item = &root->root_item;
3641 struct cache_tree corrupt_blocks;
3642 struct orphan_data_extent *orphan;
3643 struct orphan_data_extent *tmp;
3644 enum btrfs_tree_block_status status;
3645 struct node_refs nrefs;
3648 * Reuse the corrupt_block cache tree to record corrupted tree block
3650 * Unlike the usage in extent tree check, here we do it in a per
3651 * fs/subvol tree base.
3653 cache_tree_init(&corrupt_blocks);
3654 root->fs_info->corrupt_blocks = &corrupt_blocks;
3656 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3657 rec = get_root_rec(root_cache, root->root_key.objectid);
3658 BUG_ON(IS_ERR(rec));
3659 if (btrfs_root_refs(root_item) > 0)
3660 rec->found_root_item = 1;
3663 btrfs_init_path(&path);
3664 memset(&root_node, 0, sizeof(root_node));
3665 cache_tree_init(&root_node.root_cache);
3666 cache_tree_init(&root_node.inode_cache);
3667 memset(&nrefs, 0, sizeof(nrefs));
3669 /* Move the orphan extent record to corresponding inode_record */
3670 list_for_each_entry_safe(orphan, tmp,
3671 &root->orphan_data_extents, list) {
3672 struct inode_record *inode;
3674 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3676 BUG_ON(IS_ERR(inode));
3677 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3678 list_move(&orphan->list, &inode->orphan_extents);
3681 level = btrfs_header_level(root->node);
3682 memset(wc->nodes, 0, sizeof(wc->nodes));
3683 wc->nodes[level] = &root_node;
3684 wc->active_node = level;
3685 wc->root_level = level;
3687 /* We may not have checked the root block, lets do that now */
3688 if (btrfs_is_leaf(root->node))
3689 status = btrfs_check_leaf(root, NULL, root->node);
3691 status = btrfs_check_node(root, NULL, root->node);
3692 if (status != BTRFS_TREE_BLOCK_CLEAN)
3695 if (btrfs_root_refs(root_item) > 0 ||
3696 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3697 path.nodes[level] = root->node;
3698 extent_buffer_get(root->node);
3699 path.slots[level] = 0;
3701 struct btrfs_key key;
3702 struct btrfs_disk_key found_key;
3704 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3705 level = root_item->drop_level;
3706 path.lowest_level = level;
3707 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3710 btrfs_node_key(path.nodes[level], &found_key,
3712 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3713 sizeof(found_key)));
3717 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3723 wret = walk_up_tree(root, &path, wc, &level);
3730 btrfs_release_path(&path);
3732 if (!cache_tree_empty(&corrupt_blocks)) {
3733 struct cache_extent *cache;
3734 struct btrfs_corrupt_block *corrupt;
3736 printf("The following tree block(s) is corrupted in tree %llu:\n",
3737 root->root_key.objectid);
3738 cache = first_cache_extent(&corrupt_blocks);
3740 corrupt = container_of(cache,
3741 struct btrfs_corrupt_block,
3743 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3744 cache->start, corrupt->level,
3745 corrupt->key.objectid, corrupt->key.type,
3746 corrupt->key.offset);
3747 cache = next_cache_extent(cache);
3750 printf("Try to repair the btree for root %llu\n",
3751 root->root_key.objectid);
3752 ret = repair_btree(root, &corrupt_blocks);
3754 fprintf(stderr, "Failed to repair btree: %s\n",
3757 printf("Btree for root %llu is fixed\n",
3758 root->root_key.objectid);
3762 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3766 if (root_node.current) {
3767 root_node.current->checked = 1;
3768 maybe_free_inode_rec(&root_node.inode_cache,
3772 err = check_inode_recs(root, &root_node.inode_cache);
3776 free_corrupt_blocks_tree(&corrupt_blocks);
3777 root->fs_info->corrupt_blocks = NULL;
3778 free_orphan_data_extents(&root->orphan_data_extents);
3782 static int fs_root_objectid(u64 objectid)
3784 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3785 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3787 return is_fstree(objectid);
3790 static int check_fs_roots(struct btrfs_root *root,
3791 struct cache_tree *root_cache)
3793 struct btrfs_path path;
3794 struct btrfs_key key;
3795 struct walk_control wc;
3796 struct extent_buffer *leaf, *tree_node;
3797 struct btrfs_root *tmp_root;
3798 struct btrfs_root *tree_root = root->fs_info->tree_root;
3802 if (ctx.progress_enabled) {
3803 ctx.tp = TASK_FS_ROOTS;
3804 task_start(ctx.info);
3808 * Just in case we made any changes to the extent tree that weren't
3809 * reflected into the free space cache yet.
3812 reset_cached_block_groups(root->fs_info);
3813 memset(&wc, 0, sizeof(wc));
3814 cache_tree_init(&wc.shared);
3815 btrfs_init_path(&path);
3820 key.type = BTRFS_ROOT_ITEM_KEY;
3821 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3826 tree_node = tree_root->node;
3828 if (tree_node != tree_root->node) {
3829 free_root_recs_tree(root_cache);
3830 btrfs_release_path(&path);
3833 leaf = path.nodes[0];
3834 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3835 ret = btrfs_next_leaf(tree_root, &path);
3841 leaf = path.nodes[0];
3843 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3844 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3845 fs_root_objectid(key.objectid)) {
3846 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3847 tmp_root = btrfs_read_fs_root_no_cache(
3848 root->fs_info, &key);
3850 key.offset = (u64)-1;
3851 tmp_root = btrfs_read_fs_root(
3852 root->fs_info, &key);
3854 if (IS_ERR(tmp_root)) {
3858 ret = check_fs_root(tmp_root, root_cache, &wc);
3859 if (ret == -EAGAIN) {
3860 free_root_recs_tree(root_cache);
3861 btrfs_release_path(&path);
3866 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3867 btrfs_free_fs_root(tmp_root);
3868 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3869 key.type == BTRFS_ROOT_BACKREF_KEY) {
3870 process_root_ref(leaf, path.slots[0], &key,
3877 btrfs_release_path(&path);
3879 free_extent_cache_tree(&wc.shared);
3880 if (!cache_tree_empty(&wc.shared))
3881 fprintf(stderr, "warning line %d\n", __LINE__);
3883 task_stop(ctx.info);
3888 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3891 struct extent_backref *back;
3892 struct tree_backref *tback;
3893 struct data_backref *dback;
3897 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3898 back = rb_node_to_extent_backref(n);
3899 if (!back->found_extent_tree) {
3903 if (back->is_data) {
3904 dback = to_data_backref(back);
3905 fprintf(stderr, "Backref %llu %s %llu"
3906 " owner %llu offset %llu num_refs %lu"
3907 " not found in extent tree\n",
3908 (unsigned long long)rec->start,
3909 back->full_backref ?
3911 back->full_backref ?
3912 (unsigned long long)dback->parent:
3913 (unsigned long long)dback->root,
3914 (unsigned long long)dback->owner,
3915 (unsigned long long)dback->offset,
3916 (unsigned long)dback->num_refs);
3918 tback = to_tree_backref(back);
3919 fprintf(stderr, "Backref %llu parent %llu"
3920 " root %llu not found in extent tree\n",
3921 (unsigned long long)rec->start,
3922 (unsigned long long)tback->parent,
3923 (unsigned long long)tback->root);
3926 if (!back->is_data && !back->found_ref) {
3930 tback = to_tree_backref(back);
3931 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3932 (unsigned long long)rec->start,
3933 back->full_backref ? "parent" : "root",
3934 back->full_backref ?
3935 (unsigned long long)tback->parent :
3936 (unsigned long long)tback->root, back);
3938 if (back->is_data) {
3939 dback = to_data_backref(back);
3940 if (dback->found_ref != dback->num_refs) {
3944 fprintf(stderr, "Incorrect local backref count"
3945 " on %llu %s %llu owner %llu"
3946 " offset %llu found %u wanted %u back %p\n",
3947 (unsigned long long)rec->start,
3948 back->full_backref ?
3950 back->full_backref ?
3951 (unsigned long long)dback->parent:
3952 (unsigned long long)dback->root,
3953 (unsigned long long)dback->owner,
3954 (unsigned long long)dback->offset,
3955 dback->found_ref, dback->num_refs, back);
3957 if (dback->disk_bytenr != rec->start) {
3961 fprintf(stderr, "Backref disk bytenr does not"
3962 " match extent record, bytenr=%llu, "
3963 "ref bytenr=%llu\n",
3964 (unsigned long long)rec->start,
3965 (unsigned long long)dback->disk_bytenr);
3968 if (dback->bytes != rec->nr) {
3972 fprintf(stderr, "Backref bytes do not match "
3973 "extent backref, bytenr=%llu, ref "
3974 "bytes=%llu, backref bytes=%llu\n",
3975 (unsigned long long)rec->start,
3976 (unsigned long long)rec->nr,
3977 (unsigned long long)dback->bytes);
3980 if (!back->is_data) {
3983 dback = to_data_backref(back);
3984 found += dback->found_ref;
3987 if (found != rec->refs) {
3991 fprintf(stderr, "Incorrect global backref count "
3992 "on %llu found %llu wanted %llu\n",
3993 (unsigned long long)rec->start,
3994 (unsigned long long)found,
3995 (unsigned long long)rec->refs);
4001 static void __free_one_backref(struct rb_node *node)
4003 struct extent_backref *back = rb_node_to_extent_backref(node);
4008 static void free_all_extent_backrefs(struct extent_record *rec)
4010 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4013 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4014 struct cache_tree *extent_cache)
4016 struct cache_extent *cache;
4017 struct extent_record *rec;
4020 cache = first_cache_extent(extent_cache);
4023 rec = container_of(cache, struct extent_record, cache);
4024 remove_cache_extent(extent_cache, cache);
4025 free_all_extent_backrefs(rec);
4030 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4031 struct extent_record *rec)
4033 if (rec->content_checked && rec->owner_ref_checked &&
4034 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4035 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4036 !rec->bad_full_backref && !rec->crossing_stripes &&
4037 !rec->wrong_chunk_type) {
4038 remove_cache_extent(extent_cache, &rec->cache);
4039 free_all_extent_backrefs(rec);
4040 list_del_init(&rec->list);
4046 static int check_owner_ref(struct btrfs_root *root,
4047 struct extent_record *rec,
4048 struct extent_buffer *buf)
4050 struct extent_backref *node, *tmp;
4051 struct tree_backref *back;
4052 struct btrfs_root *ref_root;
4053 struct btrfs_key key;
4054 struct btrfs_path path;
4055 struct extent_buffer *parent;
4060 rbtree_postorder_for_each_entry_safe(node, tmp,
4061 &rec->backref_tree, node) {
4064 if (!node->found_ref)
4066 if (node->full_backref)
4068 back = to_tree_backref(node);
4069 if (btrfs_header_owner(buf) == back->root)
4072 BUG_ON(rec->is_root);
4074 /* try to find the block by search corresponding fs tree */
4075 key.objectid = btrfs_header_owner(buf);
4076 key.type = BTRFS_ROOT_ITEM_KEY;
4077 key.offset = (u64)-1;
4079 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4080 if (IS_ERR(ref_root))
4083 level = btrfs_header_level(buf);
4085 btrfs_item_key_to_cpu(buf, &key, 0);
4087 btrfs_node_key_to_cpu(buf, &key, 0);
4089 btrfs_init_path(&path);
4090 path.lowest_level = level + 1;
4091 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4095 parent = path.nodes[level + 1];
4096 if (parent && buf->start == btrfs_node_blockptr(parent,
4097 path.slots[level + 1]))
4100 btrfs_release_path(&path);
4101 return found ? 0 : 1;
4104 static int is_extent_tree_record(struct extent_record *rec)
4106 struct extent_backref *ref, *tmp;
4107 struct tree_backref *back;
4110 rbtree_postorder_for_each_entry_safe(ref, tmp,
4111 &rec->backref_tree, node) {
4114 back = to_tree_backref(ref);
4115 if (ref->full_backref)
4117 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4124 static int record_bad_block_io(struct btrfs_fs_info *info,
4125 struct cache_tree *extent_cache,
4128 struct extent_record *rec;
4129 struct cache_extent *cache;
4130 struct btrfs_key key;
4132 cache = lookup_cache_extent(extent_cache, start, len);
4136 rec = container_of(cache, struct extent_record, cache);
4137 if (!is_extent_tree_record(rec))
4140 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4141 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4144 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4145 struct extent_buffer *buf, int slot)
4147 if (btrfs_header_level(buf)) {
4148 struct btrfs_key_ptr ptr1, ptr2;
4150 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4151 sizeof(struct btrfs_key_ptr));
4152 read_extent_buffer(buf, &ptr2,
4153 btrfs_node_key_ptr_offset(slot + 1),
4154 sizeof(struct btrfs_key_ptr));
4155 write_extent_buffer(buf, &ptr1,
4156 btrfs_node_key_ptr_offset(slot + 1),
4157 sizeof(struct btrfs_key_ptr));
4158 write_extent_buffer(buf, &ptr2,
4159 btrfs_node_key_ptr_offset(slot),
4160 sizeof(struct btrfs_key_ptr));
4162 struct btrfs_disk_key key;
4163 btrfs_node_key(buf, &key, 0);
4164 btrfs_fixup_low_keys(root, path, &key,
4165 btrfs_header_level(buf) + 1);
4168 struct btrfs_item *item1, *item2;
4169 struct btrfs_key k1, k2;
4170 char *item1_data, *item2_data;
4171 u32 item1_offset, item2_offset, item1_size, item2_size;
4173 item1 = btrfs_item_nr(slot);
4174 item2 = btrfs_item_nr(slot + 1);
4175 btrfs_item_key_to_cpu(buf, &k1, slot);
4176 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4177 item1_offset = btrfs_item_offset(buf, item1);
4178 item2_offset = btrfs_item_offset(buf, item2);
4179 item1_size = btrfs_item_size(buf, item1);
4180 item2_size = btrfs_item_size(buf, item2);
4182 item1_data = malloc(item1_size);
4185 item2_data = malloc(item2_size);
4191 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4192 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4194 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4195 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4199 btrfs_set_item_offset(buf, item1, item2_offset);
4200 btrfs_set_item_offset(buf, item2, item1_offset);
4201 btrfs_set_item_size(buf, item1, item2_size);
4202 btrfs_set_item_size(buf, item2, item1_size);
4204 path->slots[0] = slot;
4205 btrfs_set_item_key_unsafe(root, path, &k2);
4206 path->slots[0] = slot + 1;
4207 btrfs_set_item_key_unsafe(root, path, &k1);
4212 static int fix_key_order(struct btrfs_trans_handle *trans,
4213 struct btrfs_root *root,
4214 struct btrfs_path *path)
4216 struct extent_buffer *buf;
4217 struct btrfs_key k1, k2;
4219 int level = path->lowest_level;
4222 buf = path->nodes[level];
4223 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4225 btrfs_node_key_to_cpu(buf, &k1, i);
4226 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4228 btrfs_item_key_to_cpu(buf, &k1, i);
4229 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4231 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4233 ret = swap_values(root, path, buf, i);
4236 btrfs_mark_buffer_dirty(buf);
4242 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4243 struct btrfs_root *root,
4244 struct btrfs_path *path,
4245 struct extent_buffer *buf, int slot)
4247 struct btrfs_key key;
4248 int nritems = btrfs_header_nritems(buf);
4250 btrfs_item_key_to_cpu(buf, &key, slot);
4252 /* These are all the keys we can deal with missing. */
4253 if (key.type != BTRFS_DIR_INDEX_KEY &&
4254 key.type != BTRFS_EXTENT_ITEM_KEY &&
4255 key.type != BTRFS_METADATA_ITEM_KEY &&
4256 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4257 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4260 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4261 (unsigned long long)key.objectid, key.type,
4262 (unsigned long long)key.offset, slot, buf->start);
4263 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4264 btrfs_item_nr_offset(slot + 1),
4265 sizeof(struct btrfs_item) *
4266 (nritems - slot - 1));
4267 btrfs_set_header_nritems(buf, nritems - 1);
4269 struct btrfs_disk_key disk_key;
4271 btrfs_item_key(buf, &disk_key, 0);
4272 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4274 btrfs_mark_buffer_dirty(buf);
4278 static int fix_item_offset(struct btrfs_trans_handle *trans,
4279 struct btrfs_root *root,
4280 struct btrfs_path *path)
4282 struct extent_buffer *buf;
4286 /* We should only get this for leaves */
4287 BUG_ON(path->lowest_level);
4288 buf = path->nodes[0];
4290 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4291 unsigned int shift = 0, offset;
4293 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4294 BTRFS_LEAF_DATA_SIZE(root)) {
4295 if (btrfs_item_end_nr(buf, i) >
4296 BTRFS_LEAF_DATA_SIZE(root)) {
4297 ret = delete_bogus_item(trans, root, path,
4301 fprintf(stderr, "item is off the end of the "
4302 "leaf, can't fix\n");
4306 shift = BTRFS_LEAF_DATA_SIZE(root) -
4307 btrfs_item_end_nr(buf, i);
4308 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4309 btrfs_item_offset_nr(buf, i - 1)) {
4310 if (btrfs_item_end_nr(buf, i) >
4311 btrfs_item_offset_nr(buf, i - 1)) {
4312 ret = delete_bogus_item(trans, root, path,
4316 fprintf(stderr, "items overlap, can't fix\n");
4320 shift = btrfs_item_offset_nr(buf, i - 1) -
4321 btrfs_item_end_nr(buf, i);
4326 printf("Shifting item nr %d by %u bytes in block %llu\n",
4327 i, shift, (unsigned long long)buf->start);
4328 offset = btrfs_item_offset_nr(buf, i);
4329 memmove_extent_buffer(buf,
4330 btrfs_leaf_data(buf) + offset + shift,
4331 btrfs_leaf_data(buf) + offset,
4332 btrfs_item_size_nr(buf, i));
4333 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4335 btrfs_mark_buffer_dirty(buf);
4339 * We may have moved things, in which case we want to exit so we don't
4340 * write those changes out. Once we have proper abort functionality in
4341 * progs this can be changed to something nicer.
4348 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4349 * then just return -EIO.
4351 static int try_to_fix_bad_block(struct btrfs_root *root,
4352 struct extent_buffer *buf,
4353 enum btrfs_tree_block_status status)
4355 struct btrfs_trans_handle *trans;
4356 struct ulist *roots;
4357 struct ulist_node *node;
4358 struct btrfs_root *search_root;
4359 struct btrfs_path *path;
4360 struct ulist_iterator iter;
4361 struct btrfs_key root_key, key;
4364 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4365 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4368 path = btrfs_alloc_path();
4372 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4375 btrfs_free_path(path);
4379 ULIST_ITER_INIT(&iter);
4380 while ((node = ulist_next(roots, &iter))) {
4381 root_key.objectid = node->val;
4382 root_key.type = BTRFS_ROOT_ITEM_KEY;
4383 root_key.offset = (u64)-1;
4385 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4392 trans = btrfs_start_transaction(search_root, 0);
4393 if (IS_ERR(trans)) {
4394 ret = PTR_ERR(trans);
4398 path->lowest_level = btrfs_header_level(buf);
4399 path->skip_check_block = 1;
4400 if (path->lowest_level)
4401 btrfs_node_key_to_cpu(buf, &key, 0);
4403 btrfs_item_key_to_cpu(buf, &key, 0);
4404 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4407 btrfs_commit_transaction(trans, search_root);
4410 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4411 ret = fix_key_order(trans, search_root, path);
4412 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4413 ret = fix_item_offset(trans, search_root, path);
4415 btrfs_commit_transaction(trans, search_root);
4418 btrfs_release_path(path);
4419 btrfs_commit_transaction(trans, search_root);
4422 btrfs_free_path(path);
4426 static int check_block(struct btrfs_root *root,
4427 struct cache_tree *extent_cache,
4428 struct extent_buffer *buf, u64 flags)
4430 struct extent_record *rec;
4431 struct cache_extent *cache;
4432 struct btrfs_key key;
4433 enum btrfs_tree_block_status status;
4437 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4440 rec = container_of(cache, struct extent_record, cache);
4441 rec->generation = btrfs_header_generation(buf);
4443 level = btrfs_header_level(buf);
4444 if (btrfs_header_nritems(buf) > 0) {
4447 btrfs_item_key_to_cpu(buf, &key, 0);
4449 btrfs_node_key_to_cpu(buf, &key, 0);
4451 rec->info_objectid = key.objectid;
4453 rec->info_level = level;
4455 if (btrfs_is_leaf(buf))
4456 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4458 status = btrfs_check_node(root, &rec->parent_key, buf);
4460 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4462 status = try_to_fix_bad_block(root, buf, status);
4463 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4465 fprintf(stderr, "bad block %llu\n",
4466 (unsigned long long)buf->start);
4469 * Signal to callers we need to start the scan over
4470 * again since we'll have cowed blocks.
4475 rec->content_checked = 1;
4476 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4477 rec->owner_ref_checked = 1;
4479 ret = check_owner_ref(root, rec, buf);
4481 rec->owner_ref_checked = 1;
4485 maybe_free_extent_rec(extent_cache, rec);
4490 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4491 u64 parent, u64 root)
4493 struct rb_node *node;
4494 struct tree_backref *back = NULL;
4495 struct tree_backref match = {
4502 match.parent = parent;
4503 match.node.full_backref = 1;
4508 node = rb_search(&rec->backref_tree, &match.node.node,
4509 (rb_compare_keys)compare_extent_backref, NULL);
4511 back = to_tree_backref(rb_node_to_extent_backref(node));
4516 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4517 u64 parent, u64 root)
4519 struct tree_backref *ref = malloc(sizeof(*ref));
4523 memset(&ref->node, 0, sizeof(ref->node));
4525 ref->parent = parent;
4526 ref->node.full_backref = 1;
4529 ref->node.full_backref = 0;
4531 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4536 static struct data_backref *find_data_backref(struct extent_record *rec,
4537 u64 parent, u64 root,
4538 u64 owner, u64 offset,
4540 u64 disk_bytenr, u64 bytes)
4542 struct rb_node *node;
4543 struct data_backref *back = NULL;
4544 struct data_backref match = {
4551 .found_ref = found_ref,
4552 .disk_bytenr = disk_bytenr,
4556 match.parent = parent;
4557 match.node.full_backref = 1;
4562 node = rb_search(&rec->backref_tree, &match.node.node,
4563 (rb_compare_keys)compare_extent_backref, NULL);
4565 back = to_data_backref(rb_node_to_extent_backref(node));
4570 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4571 u64 parent, u64 root,
4572 u64 owner, u64 offset,
4575 struct data_backref *ref = malloc(sizeof(*ref));
4579 memset(&ref->node, 0, sizeof(ref->node));
4580 ref->node.is_data = 1;
4583 ref->parent = parent;
4586 ref->node.full_backref = 1;
4590 ref->offset = offset;
4591 ref->node.full_backref = 0;
4593 ref->bytes = max_size;
4596 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4597 if (max_size > rec->max_size)
4598 rec->max_size = max_size;
4602 /* Check if the type of extent matches with its chunk */
4603 static void check_extent_type(struct extent_record *rec)
4605 struct btrfs_block_group_cache *bg_cache;
4607 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4611 /* data extent, check chunk directly*/
4612 if (!rec->metadata) {
4613 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4614 rec->wrong_chunk_type = 1;
4618 /* metadata extent, check the obvious case first */
4619 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4620 BTRFS_BLOCK_GROUP_METADATA))) {
4621 rec->wrong_chunk_type = 1;
4626 * Check SYSTEM extent, as it's also marked as metadata, we can only
4627 * make sure it's a SYSTEM extent by its backref
4629 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4630 struct extent_backref *node;
4631 struct tree_backref *tback;
4634 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4635 if (node->is_data) {
4636 /* tree block shouldn't have data backref */
4637 rec->wrong_chunk_type = 1;
4640 tback = container_of(node, struct tree_backref, node);
4642 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4643 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4645 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4646 if (!(bg_cache->flags & bg_type))
4647 rec->wrong_chunk_type = 1;
4652 * Allocate a new extent record, fill default values from @tmpl and insert int
4653 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4654 * the cache, otherwise it fails.
4656 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4657 struct extent_record *tmpl)
4659 struct extent_record *rec;
4662 rec = malloc(sizeof(*rec));
4665 rec->start = tmpl->start;
4666 rec->max_size = tmpl->max_size;
4667 rec->nr = max(tmpl->nr, tmpl->max_size);
4668 rec->found_rec = tmpl->found_rec;
4669 rec->content_checked = tmpl->content_checked;
4670 rec->owner_ref_checked = tmpl->owner_ref_checked;
4671 rec->num_duplicates = 0;
4672 rec->metadata = tmpl->metadata;
4673 rec->flag_block_full_backref = FLAG_UNSET;
4674 rec->bad_full_backref = 0;
4675 rec->crossing_stripes = 0;
4676 rec->wrong_chunk_type = 0;
4677 rec->is_root = tmpl->is_root;
4678 rec->refs = tmpl->refs;
4679 rec->extent_item_refs = tmpl->extent_item_refs;
4680 rec->parent_generation = tmpl->parent_generation;
4681 INIT_LIST_HEAD(&rec->backrefs);
4682 INIT_LIST_HEAD(&rec->dups);
4683 INIT_LIST_HEAD(&rec->list);
4684 rec->backref_tree = RB_ROOT;
4685 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4686 rec->cache.start = tmpl->start;
4687 rec->cache.size = tmpl->nr;
4688 ret = insert_cache_extent(extent_cache, &rec->cache);
4690 bytes_used += rec->nr;
4693 rec->crossing_stripes = check_crossing_stripes(rec->start,
4694 global_info->tree_root->nodesize);
4695 check_extent_type(rec);
4700 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4702 * - refs - if found, increase refs
4703 * - is_root - if found, set
4704 * - content_checked - if found, set
4705 * - owner_ref_checked - if found, set
4707 * If not found, create a new one, initialize and insert.
4709 static int add_extent_rec(struct cache_tree *extent_cache,
4710 struct extent_record *tmpl)
4712 struct extent_record *rec;
4713 struct cache_extent *cache;
4717 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4719 rec = container_of(cache, struct extent_record, cache);
4723 rec->nr = max(tmpl->nr, tmpl->max_size);
4726 * We need to make sure to reset nr to whatever the extent
4727 * record says was the real size, this way we can compare it to
4730 if (tmpl->found_rec) {
4731 if (tmpl->start != rec->start || rec->found_rec) {
4732 struct extent_record *tmp;
4735 if (list_empty(&rec->list))
4736 list_add_tail(&rec->list,
4737 &duplicate_extents);
4740 * We have to do this song and dance in case we
4741 * find an extent record that falls inside of
4742 * our current extent record but does not have
4743 * the same objectid.
4745 tmp = malloc(sizeof(*tmp));
4748 tmp->start = tmpl->start;
4749 tmp->max_size = tmpl->max_size;
4752 tmp->metadata = tmpl->metadata;
4753 tmp->extent_item_refs = tmpl->extent_item_refs;
4754 INIT_LIST_HEAD(&tmp->list);
4755 list_add_tail(&tmp->list, &rec->dups);
4756 rec->num_duplicates++;
4763 if (tmpl->extent_item_refs && !dup) {
4764 if (rec->extent_item_refs) {
4765 fprintf(stderr, "block %llu rec "
4766 "extent_item_refs %llu, passed %llu\n",
4767 (unsigned long long)tmpl->start,
4768 (unsigned long long)
4769 rec->extent_item_refs,
4770 (unsigned long long)tmpl->extent_item_refs);
4772 rec->extent_item_refs = tmpl->extent_item_refs;
4776 if (tmpl->content_checked)
4777 rec->content_checked = 1;
4778 if (tmpl->owner_ref_checked)
4779 rec->owner_ref_checked = 1;
4780 memcpy(&rec->parent_key, &tmpl->parent_key,
4781 sizeof(tmpl->parent_key));
4782 if (tmpl->parent_generation)
4783 rec->parent_generation = tmpl->parent_generation;
4784 if (rec->max_size < tmpl->max_size)
4785 rec->max_size = tmpl->max_size;
4788 * A metadata extent can't cross stripe_len boundary, otherwise
4789 * kernel scrub won't be able to handle it.
4790 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4794 rec->crossing_stripes = check_crossing_stripes(
4795 rec->start, global_info->tree_root->nodesize);
4796 check_extent_type(rec);
4797 maybe_free_extent_rec(extent_cache, rec);
4801 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4806 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4807 u64 parent, u64 root, int found_ref)
4809 struct extent_record *rec;
4810 struct tree_backref *back;
4811 struct cache_extent *cache;
4813 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4815 struct extent_record tmpl;
4817 memset(&tmpl, 0, sizeof(tmpl));
4818 tmpl.start = bytenr;
4822 add_extent_rec_nolookup(extent_cache, &tmpl);
4824 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4829 rec = container_of(cache, struct extent_record, cache);
4830 if (rec->start != bytenr) {
4834 back = find_tree_backref(rec, parent, root);
4836 back = alloc_tree_backref(rec, parent, root);
4841 if (back->node.found_ref) {
4842 fprintf(stderr, "Extent back ref already exists "
4843 "for %llu parent %llu root %llu \n",
4844 (unsigned long long)bytenr,
4845 (unsigned long long)parent,
4846 (unsigned long long)root);
4848 back->node.found_ref = 1;
4850 if (back->node.found_extent_tree) {
4851 fprintf(stderr, "Extent back ref already exists "
4852 "for %llu parent %llu root %llu \n",
4853 (unsigned long long)bytenr,
4854 (unsigned long long)parent,
4855 (unsigned long long)root);
4857 back->node.found_extent_tree = 1;
4859 check_extent_type(rec);
4860 maybe_free_extent_rec(extent_cache, rec);
4864 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4865 u64 parent, u64 root, u64 owner, u64 offset,
4866 u32 num_refs, int found_ref, u64 max_size)
4868 struct extent_record *rec;
4869 struct data_backref *back;
4870 struct cache_extent *cache;
4872 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4874 struct extent_record tmpl;
4876 memset(&tmpl, 0, sizeof(tmpl));
4877 tmpl.start = bytenr;
4879 tmpl.max_size = max_size;
4881 add_extent_rec_nolookup(extent_cache, &tmpl);
4883 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4888 rec = container_of(cache, struct extent_record, cache);
4889 if (rec->max_size < max_size)
4890 rec->max_size = max_size;
4893 * If found_ref is set then max_size is the real size and must match the
4894 * existing refs. So if we have already found a ref then we need to
4895 * make sure that this ref matches the existing one, otherwise we need
4896 * to add a new backref so we can notice that the backrefs don't match
4897 * and we need to figure out who is telling the truth. This is to
4898 * account for that awful fsync bug I introduced where we'd end up with
4899 * a btrfs_file_extent_item that would have its length include multiple
4900 * prealloc extents or point inside of a prealloc extent.
4902 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4905 back = alloc_data_backref(rec, parent, root, owner, offset,
4911 BUG_ON(num_refs != 1);
4912 if (back->node.found_ref)
4913 BUG_ON(back->bytes != max_size);
4914 back->node.found_ref = 1;
4915 back->found_ref += 1;
4916 back->bytes = max_size;
4917 back->disk_bytenr = bytenr;
4919 rec->content_checked = 1;
4920 rec->owner_ref_checked = 1;
4922 if (back->node.found_extent_tree) {
4923 fprintf(stderr, "Extent back ref already exists "
4924 "for %llu parent %llu root %llu "
4925 "owner %llu offset %llu num_refs %lu\n",
4926 (unsigned long long)bytenr,
4927 (unsigned long long)parent,
4928 (unsigned long long)root,
4929 (unsigned long long)owner,
4930 (unsigned long long)offset,
4931 (unsigned long)num_refs);
4933 back->num_refs = num_refs;
4934 back->node.found_extent_tree = 1;
4936 maybe_free_extent_rec(extent_cache, rec);
4940 static int add_pending(struct cache_tree *pending,
4941 struct cache_tree *seen, u64 bytenr, u32 size)
4944 ret = add_cache_extent(seen, bytenr, size);
4947 add_cache_extent(pending, bytenr, size);
4951 static int pick_next_pending(struct cache_tree *pending,
4952 struct cache_tree *reada,
4953 struct cache_tree *nodes,
4954 u64 last, struct block_info *bits, int bits_nr,
4957 unsigned long node_start = last;
4958 struct cache_extent *cache;
4961 cache = search_cache_extent(reada, 0);
4963 bits[0].start = cache->start;
4964 bits[0].size = cache->size;
4969 if (node_start > 32768)
4970 node_start -= 32768;
4972 cache = search_cache_extent(nodes, node_start);
4974 cache = search_cache_extent(nodes, 0);
4977 cache = search_cache_extent(pending, 0);
4982 bits[ret].start = cache->start;
4983 bits[ret].size = cache->size;
4984 cache = next_cache_extent(cache);
4986 } while (cache && ret < bits_nr);
4992 bits[ret].start = cache->start;
4993 bits[ret].size = cache->size;
4994 cache = next_cache_extent(cache);
4996 } while (cache && ret < bits_nr);
4998 if (bits_nr - ret > 8) {
4999 u64 lookup = bits[0].start + bits[0].size;
5000 struct cache_extent *next;
5001 next = search_cache_extent(pending, lookup);
5003 if (next->start - lookup > 32768)
5005 bits[ret].start = next->start;
5006 bits[ret].size = next->size;
5007 lookup = next->start + next->size;
5011 next = next_cache_extent(next);
5019 static void free_chunk_record(struct cache_extent *cache)
5021 struct chunk_record *rec;
5023 rec = container_of(cache, struct chunk_record, cache);
5024 list_del_init(&rec->list);
5025 list_del_init(&rec->dextents);
5029 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5031 cache_tree_free_extents(chunk_cache, free_chunk_record);
5034 static void free_device_record(struct rb_node *node)
5036 struct device_record *rec;
5038 rec = container_of(node, struct device_record, node);
5042 FREE_RB_BASED_TREE(device_cache, free_device_record);
5044 int insert_block_group_record(struct block_group_tree *tree,
5045 struct block_group_record *bg_rec)
5049 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5053 list_add_tail(&bg_rec->list, &tree->block_groups);
5057 static void free_block_group_record(struct cache_extent *cache)
5059 struct block_group_record *rec;
5061 rec = container_of(cache, struct block_group_record, cache);
5062 list_del_init(&rec->list);
5066 void free_block_group_tree(struct block_group_tree *tree)
5068 cache_tree_free_extents(&tree->tree, free_block_group_record);
5071 int insert_device_extent_record(struct device_extent_tree *tree,
5072 struct device_extent_record *de_rec)
5077 * Device extent is a bit different from the other extents, because
5078 * the extents which belong to the different devices may have the
5079 * same start and size, so we need use the special extent cache
5080 * search/insert functions.
5082 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5086 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5087 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5091 static void free_device_extent_record(struct cache_extent *cache)
5093 struct device_extent_record *rec;
5095 rec = container_of(cache, struct device_extent_record, cache);
5096 if (!list_empty(&rec->chunk_list))
5097 list_del_init(&rec->chunk_list);
5098 if (!list_empty(&rec->device_list))
5099 list_del_init(&rec->device_list);
5103 void free_device_extent_tree(struct device_extent_tree *tree)
5105 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5108 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5109 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5110 struct extent_buffer *leaf, int slot)
5112 struct btrfs_extent_ref_v0 *ref0;
5113 struct btrfs_key key;
5115 btrfs_item_key_to_cpu(leaf, &key, slot);
5116 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5117 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5118 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5120 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5121 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5127 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5128 struct btrfs_key *key,
5131 struct btrfs_chunk *ptr;
5132 struct chunk_record *rec;
5135 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5136 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5138 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5140 fprintf(stderr, "memory allocation failed\n");
5144 INIT_LIST_HEAD(&rec->list);
5145 INIT_LIST_HEAD(&rec->dextents);
5148 rec->cache.start = key->offset;
5149 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5151 rec->generation = btrfs_header_generation(leaf);
5153 rec->objectid = key->objectid;
5154 rec->type = key->type;
5155 rec->offset = key->offset;
5157 rec->length = rec->cache.size;
5158 rec->owner = btrfs_chunk_owner(leaf, ptr);
5159 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5160 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5161 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5162 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5163 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5164 rec->num_stripes = num_stripes;
5165 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5167 for (i = 0; i < rec->num_stripes; ++i) {
5168 rec->stripes[i].devid =
5169 btrfs_stripe_devid_nr(leaf, ptr, i);
5170 rec->stripes[i].offset =
5171 btrfs_stripe_offset_nr(leaf, ptr, i);
5172 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5173 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5180 static int process_chunk_item(struct cache_tree *chunk_cache,
5181 struct btrfs_key *key, struct extent_buffer *eb,
5184 struct chunk_record *rec;
5187 rec = btrfs_new_chunk_record(eb, key, slot);
5188 ret = insert_cache_extent(chunk_cache, &rec->cache);
5190 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5191 rec->offset, rec->length);
5198 static int process_device_item(struct rb_root *dev_cache,
5199 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5201 struct btrfs_dev_item *ptr;
5202 struct device_record *rec;
5205 ptr = btrfs_item_ptr(eb,
5206 slot, struct btrfs_dev_item);
5208 rec = malloc(sizeof(*rec));
5210 fprintf(stderr, "memory allocation failed\n");
5214 rec->devid = key->offset;
5215 rec->generation = btrfs_header_generation(eb);
5217 rec->objectid = key->objectid;
5218 rec->type = key->type;
5219 rec->offset = key->offset;
5221 rec->devid = btrfs_device_id(eb, ptr);
5222 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5223 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5225 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5227 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5234 struct block_group_record *
5235 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5238 struct btrfs_block_group_item *ptr;
5239 struct block_group_record *rec;
5241 rec = calloc(1, sizeof(*rec));
5243 fprintf(stderr, "memory allocation failed\n");
5247 rec->cache.start = key->objectid;
5248 rec->cache.size = key->offset;
5250 rec->generation = btrfs_header_generation(leaf);
5252 rec->objectid = key->objectid;
5253 rec->type = key->type;
5254 rec->offset = key->offset;
5256 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5257 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5259 INIT_LIST_HEAD(&rec->list);
5264 static int process_block_group_item(struct block_group_tree *block_group_cache,
5265 struct btrfs_key *key,
5266 struct extent_buffer *eb, int slot)
5268 struct block_group_record *rec;
5271 rec = btrfs_new_block_group_record(eb, key, slot);
5272 ret = insert_block_group_record(block_group_cache, rec);
5274 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5275 rec->objectid, rec->offset);
5282 struct device_extent_record *
5283 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5284 struct btrfs_key *key, int slot)
5286 struct device_extent_record *rec;
5287 struct btrfs_dev_extent *ptr;
5289 rec = calloc(1, sizeof(*rec));
5291 fprintf(stderr, "memory allocation failed\n");
5295 rec->cache.objectid = key->objectid;
5296 rec->cache.start = key->offset;
5298 rec->generation = btrfs_header_generation(leaf);
5300 rec->objectid = key->objectid;
5301 rec->type = key->type;
5302 rec->offset = key->offset;
5304 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5305 rec->chunk_objecteid =
5306 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5308 btrfs_dev_extent_chunk_offset(leaf, ptr);
5309 rec->length = btrfs_dev_extent_length(leaf, ptr);
5310 rec->cache.size = rec->length;
5312 INIT_LIST_HEAD(&rec->chunk_list);
5313 INIT_LIST_HEAD(&rec->device_list);
5319 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5320 struct btrfs_key *key, struct extent_buffer *eb,
5323 struct device_extent_record *rec;
5326 rec = btrfs_new_device_extent_record(eb, key, slot);
5327 ret = insert_device_extent_record(dev_extent_cache, rec);
5330 "Device extent[%llu, %llu, %llu] existed.\n",
5331 rec->objectid, rec->offset, rec->length);
5338 static int process_extent_item(struct btrfs_root *root,
5339 struct cache_tree *extent_cache,
5340 struct extent_buffer *eb, int slot)
5342 struct btrfs_extent_item *ei;
5343 struct btrfs_extent_inline_ref *iref;
5344 struct btrfs_extent_data_ref *dref;
5345 struct btrfs_shared_data_ref *sref;
5346 struct btrfs_key key;
5347 struct extent_record tmpl;
5351 u32 item_size = btrfs_item_size_nr(eb, slot);
5357 btrfs_item_key_to_cpu(eb, &key, slot);
5359 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5361 num_bytes = root->nodesize;
5363 num_bytes = key.offset;
5366 if (item_size < sizeof(*ei)) {
5367 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5368 struct btrfs_extent_item_v0 *ei0;
5369 BUG_ON(item_size != sizeof(*ei0));
5370 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5371 refs = btrfs_extent_refs_v0(eb, ei0);
5375 memset(&tmpl, 0, sizeof(tmpl));
5376 tmpl.start = key.objectid;
5377 tmpl.nr = num_bytes;
5378 tmpl.extent_item_refs = refs;
5379 tmpl.metadata = metadata;
5381 tmpl.max_size = num_bytes;
5383 return add_extent_rec(extent_cache, &tmpl);
5386 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5387 refs = btrfs_extent_refs(eb, ei);
5388 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5393 memset(&tmpl, 0, sizeof(tmpl));
5394 tmpl.start = key.objectid;
5395 tmpl.nr = num_bytes;
5396 tmpl.extent_item_refs = refs;
5397 tmpl.metadata = metadata;
5399 tmpl.max_size = num_bytes;
5400 add_extent_rec(extent_cache, &tmpl);
5402 ptr = (unsigned long)(ei + 1);
5403 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5404 key.type == BTRFS_EXTENT_ITEM_KEY)
5405 ptr += sizeof(struct btrfs_tree_block_info);
5407 end = (unsigned long)ei + item_size;
5409 iref = (struct btrfs_extent_inline_ref *)ptr;
5410 type = btrfs_extent_inline_ref_type(eb, iref);
5411 offset = btrfs_extent_inline_ref_offset(eb, iref);
5413 case BTRFS_TREE_BLOCK_REF_KEY:
5414 add_tree_backref(extent_cache, key.objectid,
5417 case BTRFS_SHARED_BLOCK_REF_KEY:
5418 add_tree_backref(extent_cache, key.objectid,
5421 case BTRFS_EXTENT_DATA_REF_KEY:
5422 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5423 add_data_backref(extent_cache, key.objectid, 0,
5424 btrfs_extent_data_ref_root(eb, dref),
5425 btrfs_extent_data_ref_objectid(eb,
5427 btrfs_extent_data_ref_offset(eb, dref),
5428 btrfs_extent_data_ref_count(eb, dref),
5431 case BTRFS_SHARED_DATA_REF_KEY:
5432 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5433 add_data_backref(extent_cache, key.objectid, offset,
5435 btrfs_shared_data_ref_count(eb, sref),
5439 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5440 key.objectid, key.type, num_bytes);
5443 ptr += btrfs_extent_inline_ref_size(type);
5450 static int check_cache_range(struct btrfs_root *root,
5451 struct btrfs_block_group_cache *cache,
5452 u64 offset, u64 bytes)
5454 struct btrfs_free_space *entry;
5460 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5461 bytenr = btrfs_sb_offset(i);
5462 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5463 cache->key.objectid, bytenr, 0,
5464 &logical, &nr, &stripe_len);
5469 if (logical[nr] + stripe_len <= offset)
5471 if (offset + bytes <= logical[nr])
5473 if (logical[nr] == offset) {
5474 if (stripe_len >= bytes) {
5478 bytes -= stripe_len;
5479 offset += stripe_len;
5480 } else if (logical[nr] < offset) {
5481 if (logical[nr] + stripe_len >=
5486 bytes = (offset + bytes) -
5487 (logical[nr] + stripe_len);
5488 offset = logical[nr] + stripe_len;
5491 * Could be tricky, the super may land in the
5492 * middle of the area we're checking. First
5493 * check the easiest case, it's at the end.
5495 if (logical[nr] + stripe_len >=
5497 bytes = logical[nr] - offset;
5501 /* Check the left side */
5502 ret = check_cache_range(root, cache,
5504 logical[nr] - offset);
5510 /* Now we continue with the right side */
5511 bytes = (offset + bytes) -
5512 (logical[nr] + stripe_len);
5513 offset = logical[nr] + stripe_len;
5520 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5522 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5523 offset, offset+bytes);
5527 if (entry->offset != offset) {
5528 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5533 if (entry->bytes != bytes) {
5534 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5535 bytes, entry->bytes, offset);
5539 unlink_free_space(cache->free_space_ctl, entry);
5544 static int verify_space_cache(struct btrfs_root *root,
5545 struct btrfs_block_group_cache *cache)
5547 struct btrfs_path *path;
5548 struct extent_buffer *leaf;
5549 struct btrfs_key key;
5553 path = btrfs_alloc_path();
5557 root = root->fs_info->extent_root;
5559 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5561 key.objectid = last;
5563 key.type = BTRFS_EXTENT_ITEM_KEY;
5565 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5570 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5571 ret = btrfs_next_leaf(root, path);
5579 leaf = path->nodes[0];
5580 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5581 if (key.objectid >= cache->key.offset + cache->key.objectid)
5583 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5584 key.type != BTRFS_METADATA_ITEM_KEY) {
5589 if (last == key.objectid) {
5590 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5591 last = key.objectid + key.offset;
5593 last = key.objectid + root->nodesize;
5598 ret = check_cache_range(root, cache, last,
5599 key.objectid - last);
5602 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5603 last = key.objectid + key.offset;
5605 last = key.objectid + root->nodesize;
5609 if (last < cache->key.objectid + cache->key.offset)
5610 ret = check_cache_range(root, cache, last,
5611 cache->key.objectid +
5612 cache->key.offset - last);
5615 btrfs_free_path(path);
5618 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5619 fprintf(stderr, "There are still entries left in the space "
5627 static int check_space_cache(struct btrfs_root *root)
5629 struct btrfs_block_group_cache *cache;
5630 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5634 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5635 btrfs_super_generation(root->fs_info->super_copy) !=
5636 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5637 printf("cache and super generation don't match, space cache "
5638 "will be invalidated\n");
5642 if (ctx.progress_enabled) {
5643 ctx.tp = TASK_FREE_SPACE;
5644 task_start(ctx.info);
5648 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5652 start = cache->key.objectid + cache->key.offset;
5653 if (!cache->free_space_ctl) {
5654 if (btrfs_init_free_space_ctl(cache,
5655 root->sectorsize)) {
5660 btrfs_remove_free_space_cache(cache);
5663 if (btrfs_fs_compat_ro(root->fs_info,
5664 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5665 ret = exclude_super_stripes(root, cache);
5667 fprintf(stderr, "could not exclude super stripes: %s\n",
5672 ret = load_free_space_tree(root->fs_info, cache);
5673 free_excluded_extents(root, cache);
5675 fprintf(stderr, "could not load free space tree: %s\n",
5682 ret = load_free_space_cache(root->fs_info, cache);
5687 ret = verify_space_cache(root, cache);
5689 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5690 cache->key.objectid);
5695 task_stop(ctx.info);
5697 return error ? -EINVAL : 0;
5700 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5701 u64 num_bytes, unsigned long leaf_offset,
5702 struct extent_buffer *eb) {
5705 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5707 unsigned long csum_offset;
5711 u64 data_checked = 0;
5717 if (num_bytes % root->sectorsize)
5720 data = malloc(num_bytes);
5724 while (offset < num_bytes) {
5727 read_len = num_bytes - offset;
5728 /* read as much space once a time */
5729 ret = read_extent_data(root, data + offset,
5730 bytenr + offset, &read_len, mirror);
5734 /* verify every 4k data's checksum */
5735 while (data_checked < read_len) {
5737 tmp = offset + data_checked;
5739 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5740 csum, root->sectorsize);
5741 btrfs_csum_final(csum, (char *)&csum);
5743 csum_offset = leaf_offset +
5744 tmp / root->sectorsize * csum_size;
5745 read_extent_buffer(eb, (char *)&csum_expected,
5746 csum_offset, csum_size);
5747 /* try another mirror */
5748 if (csum != csum_expected) {
5749 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5750 mirror, bytenr + tmp,
5751 csum, csum_expected);
5752 num_copies = btrfs_num_copies(
5753 &root->fs_info->mapping_tree,
5755 if (mirror < num_copies - 1) {
5760 data_checked += root->sectorsize;
5769 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5772 struct btrfs_path *path;
5773 struct extent_buffer *leaf;
5774 struct btrfs_key key;
5777 path = btrfs_alloc_path();
5779 fprintf(stderr, "Error allocating path\n");
5783 key.objectid = bytenr;
5784 key.type = BTRFS_EXTENT_ITEM_KEY;
5785 key.offset = (u64)-1;
5788 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5791 fprintf(stderr, "Error looking up extent record %d\n", ret);
5792 btrfs_free_path(path);
5795 if (path->slots[0] > 0) {
5798 ret = btrfs_prev_leaf(root, path);
5801 } else if (ret > 0) {
5808 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5811 * Block group items come before extent items if they have the same
5812 * bytenr, so walk back one more just in case. Dear future traveller,
5813 * first congrats on mastering time travel. Now if it's not too much
5814 * trouble could you go back to 2006 and tell Chris to make the
5815 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5816 * EXTENT_ITEM_KEY please?
5818 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5819 if (path->slots[0] > 0) {
5822 ret = btrfs_prev_leaf(root, path);
5825 } else if (ret > 0) {
5830 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5834 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5835 ret = btrfs_next_leaf(root, path);
5837 fprintf(stderr, "Error going to next leaf "
5839 btrfs_free_path(path);
5845 leaf = path->nodes[0];
5846 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5847 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5851 if (key.objectid + key.offset < bytenr) {
5855 if (key.objectid > bytenr + num_bytes)
5858 if (key.objectid == bytenr) {
5859 if (key.offset >= num_bytes) {
5863 num_bytes -= key.offset;
5864 bytenr += key.offset;
5865 } else if (key.objectid < bytenr) {
5866 if (key.objectid + key.offset >= bytenr + num_bytes) {
5870 num_bytes = (bytenr + num_bytes) -
5871 (key.objectid + key.offset);
5872 bytenr = key.objectid + key.offset;
5874 if (key.objectid + key.offset < bytenr + num_bytes) {
5875 u64 new_start = key.objectid + key.offset;
5876 u64 new_bytes = bytenr + num_bytes - new_start;
5879 * Weird case, the extent is in the middle of
5880 * our range, we'll have to search one side
5881 * and then the other. Not sure if this happens
5882 * in real life, but no harm in coding it up
5883 * anyway just in case.
5885 btrfs_release_path(path);
5886 ret = check_extent_exists(root, new_start,
5889 fprintf(stderr, "Right section didn't "
5893 num_bytes = key.objectid - bytenr;
5896 num_bytes = key.objectid - bytenr;
5903 if (num_bytes && !ret) {
5904 fprintf(stderr, "There are no extents for csum range "
5905 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5909 btrfs_free_path(path);
5913 static int check_csums(struct btrfs_root *root)
5915 struct btrfs_path *path;
5916 struct extent_buffer *leaf;
5917 struct btrfs_key key;
5918 u64 offset = 0, num_bytes = 0;
5919 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5923 unsigned long leaf_offset;
5925 root = root->fs_info->csum_root;
5926 if (!extent_buffer_uptodate(root->node)) {
5927 fprintf(stderr, "No valid csum tree found\n");
5931 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5932 key.type = BTRFS_EXTENT_CSUM_KEY;
5935 path = btrfs_alloc_path();
5939 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5941 fprintf(stderr, "Error searching csum tree %d\n", ret);
5942 btrfs_free_path(path);
5946 if (ret > 0 && path->slots[0])
5951 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5952 ret = btrfs_next_leaf(root, path);
5954 fprintf(stderr, "Error going to next leaf "
5961 leaf = path->nodes[0];
5963 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5964 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5969 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5970 csum_size) * root->sectorsize;
5971 if (!check_data_csum)
5972 goto skip_csum_check;
5973 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5974 ret = check_extent_csums(root, key.offset, data_len,
5980 offset = key.offset;
5981 } else if (key.offset != offset + num_bytes) {
5982 ret = check_extent_exists(root, offset, num_bytes);
5984 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5985 "there is no extent record\n",
5986 offset, offset+num_bytes);
5989 offset = key.offset;
5992 num_bytes += data_len;
5996 btrfs_free_path(path);
6000 static int is_dropped_key(struct btrfs_key *key,
6001 struct btrfs_key *drop_key) {
6002 if (key->objectid < drop_key->objectid)
6004 else if (key->objectid == drop_key->objectid) {
6005 if (key->type < drop_key->type)
6007 else if (key->type == drop_key->type) {
6008 if (key->offset < drop_key->offset)
6016 * Here are the rules for FULL_BACKREF.
6018 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6019 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6021 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6022 * if it happened after the relocation occurred since we'll have dropped the
6023 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6024 * have no real way to know for sure.
6026 * We process the blocks one root at a time, and we start from the lowest root
6027 * objectid and go to the highest. So we can just lookup the owner backref for
6028 * the record and if we don't find it then we know it doesn't exist and we have
6031 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6032 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6033 * be set or not and then we can check later once we've gathered all the refs.
6035 static int calc_extent_flag(struct btrfs_root *root,
6036 struct cache_tree *extent_cache,
6037 struct extent_buffer *buf,
6038 struct root_item_record *ri,
6041 struct extent_record *rec;
6042 struct cache_extent *cache;
6043 struct tree_backref *tback;
6046 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6047 /* we have added this extent before */
6049 rec = container_of(cache, struct extent_record, cache);
6052 * Except file/reloc tree, we can not have
6055 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6060 if (buf->start == ri->bytenr)
6063 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6066 owner = btrfs_header_owner(buf);
6067 if (owner == ri->objectid)
6070 tback = find_tree_backref(rec, 0, owner);
6075 if (rec->flag_block_full_backref != FLAG_UNSET &&
6076 rec->flag_block_full_backref != 0)
6077 rec->bad_full_backref = 1;
6080 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6081 if (rec->flag_block_full_backref != FLAG_UNSET &&
6082 rec->flag_block_full_backref != 1)
6083 rec->bad_full_backref = 1;
6087 static int run_next_block(struct btrfs_root *root,
6088 struct block_info *bits,
6091 struct cache_tree *pending,
6092 struct cache_tree *seen,
6093 struct cache_tree *reada,
6094 struct cache_tree *nodes,
6095 struct cache_tree *extent_cache,
6096 struct cache_tree *chunk_cache,
6097 struct rb_root *dev_cache,
6098 struct block_group_tree *block_group_cache,
6099 struct device_extent_tree *dev_extent_cache,
6100 struct root_item_record *ri)
6102 struct extent_buffer *buf;
6103 struct extent_record *rec = NULL;
6114 struct btrfs_key key;
6115 struct cache_extent *cache;
6118 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6119 bits_nr, &reada_bits);
6124 for(i = 0; i < nritems; i++) {
6125 ret = add_cache_extent(reada, bits[i].start,
6130 /* fixme, get the parent transid */
6131 readahead_tree_block(root, bits[i].start,
6135 *last = bits[0].start;
6136 bytenr = bits[0].start;
6137 size = bits[0].size;
6139 cache = lookup_cache_extent(pending, bytenr, size);
6141 remove_cache_extent(pending, cache);
6144 cache = lookup_cache_extent(reada, bytenr, size);
6146 remove_cache_extent(reada, cache);
6149 cache = lookup_cache_extent(nodes, bytenr, size);
6151 remove_cache_extent(nodes, cache);
6154 cache = lookup_cache_extent(extent_cache, bytenr, size);
6156 rec = container_of(cache, struct extent_record, cache);
6157 gen = rec->parent_generation;
6160 /* fixme, get the real parent transid */
6161 buf = read_tree_block(root, bytenr, size, gen);
6162 if (!extent_buffer_uptodate(buf)) {
6163 record_bad_block_io(root->fs_info,
6164 extent_cache, bytenr, size);
6168 nritems = btrfs_header_nritems(buf);
6171 if (!init_extent_tree) {
6172 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6173 btrfs_header_level(buf), 1, NULL,
6176 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6178 fprintf(stderr, "Couldn't calc extent flags\n");
6179 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6184 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6186 fprintf(stderr, "Couldn't calc extent flags\n");
6187 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6191 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6193 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6194 ri->objectid == btrfs_header_owner(buf)) {
6196 * Ok we got to this block from it's original owner and
6197 * we have FULL_BACKREF set. Relocation can leave
6198 * converted blocks over so this is altogether possible,
6199 * however it's not possible if the generation > the
6200 * last snapshot, so check for this case.
6202 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6203 btrfs_header_generation(buf) > ri->last_snapshot) {
6204 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6205 rec->bad_full_backref = 1;
6210 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6211 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6212 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6213 rec->bad_full_backref = 1;
6217 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6218 rec->flag_block_full_backref = 1;
6222 rec->flag_block_full_backref = 0;
6224 owner = btrfs_header_owner(buf);
6227 ret = check_block(root, extent_cache, buf, flags);
6231 if (btrfs_is_leaf(buf)) {
6232 btree_space_waste += btrfs_leaf_free_space(root, buf);
6233 for (i = 0; i < nritems; i++) {
6234 struct btrfs_file_extent_item *fi;
6235 btrfs_item_key_to_cpu(buf, &key, i);
6236 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6237 process_extent_item(root, extent_cache, buf,
6241 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6242 process_extent_item(root, extent_cache, buf,
6246 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6248 btrfs_item_size_nr(buf, i);
6251 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6252 process_chunk_item(chunk_cache, &key, buf, i);
6255 if (key.type == BTRFS_DEV_ITEM_KEY) {
6256 process_device_item(dev_cache, &key, buf, i);
6259 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6260 process_block_group_item(block_group_cache,
6264 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6265 process_device_extent_item(dev_extent_cache,
6270 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6271 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6272 process_extent_ref_v0(extent_cache, buf, i);
6279 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6280 add_tree_backref(extent_cache, key.objectid, 0,
6284 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6285 add_tree_backref(extent_cache, key.objectid,
6289 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6290 struct btrfs_extent_data_ref *ref;
6291 ref = btrfs_item_ptr(buf, i,
6292 struct btrfs_extent_data_ref);
6293 add_data_backref(extent_cache,
6295 btrfs_extent_data_ref_root(buf, ref),
6296 btrfs_extent_data_ref_objectid(buf,
6298 btrfs_extent_data_ref_offset(buf, ref),
6299 btrfs_extent_data_ref_count(buf, ref),
6300 0, root->sectorsize);
6303 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6304 struct btrfs_shared_data_ref *ref;
6305 ref = btrfs_item_ptr(buf, i,
6306 struct btrfs_shared_data_ref);
6307 add_data_backref(extent_cache,
6308 key.objectid, key.offset, 0, 0, 0,
6309 btrfs_shared_data_ref_count(buf, ref),
6310 0, root->sectorsize);
6313 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6314 struct bad_item *bad;
6316 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6320 bad = malloc(sizeof(struct bad_item));
6323 INIT_LIST_HEAD(&bad->list);
6324 memcpy(&bad->key, &key,
6325 sizeof(struct btrfs_key));
6326 bad->root_id = owner;
6327 list_add_tail(&bad->list, &delete_items);
6330 if (key.type != BTRFS_EXTENT_DATA_KEY)
6332 fi = btrfs_item_ptr(buf, i,
6333 struct btrfs_file_extent_item);
6334 if (btrfs_file_extent_type(buf, fi) ==
6335 BTRFS_FILE_EXTENT_INLINE)
6337 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6340 data_bytes_allocated +=
6341 btrfs_file_extent_disk_num_bytes(buf, fi);
6342 if (data_bytes_allocated < root->sectorsize) {
6345 data_bytes_referenced +=
6346 btrfs_file_extent_num_bytes(buf, fi);
6347 add_data_backref(extent_cache,
6348 btrfs_file_extent_disk_bytenr(buf, fi),
6349 parent, owner, key.objectid, key.offset -
6350 btrfs_file_extent_offset(buf, fi), 1, 1,
6351 btrfs_file_extent_disk_num_bytes(buf, fi));
6355 struct btrfs_key first_key;
6357 first_key.objectid = 0;
6360 btrfs_item_key_to_cpu(buf, &first_key, 0);
6361 level = btrfs_header_level(buf);
6362 for (i = 0; i < nritems; i++) {
6363 struct extent_record tmpl;
6365 ptr = btrfs_node_blockptr(buf, i);
6366 size = root->nodesize;
6367 btrfs_node_key_to_cpu(buf, &key, i);
6369 if ((level == ri->drop_level)
6370 && is_dropped_key(&key, &ri->drop_key)) {
6375 memset(&tmpl, 0, sizeof(tmpl));
6376 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6377 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6382 tmpl.max_size = size;
6383 ret = add_extent_rec(extent_cache, &tmpl);
6386 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6389 add_pending(nodes, seen, ptr, size);
6391 add_pending(pending, seen, ptr, size);
6394 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6395 nritems) * sizeof(struct btrfs_key_ptr);
6397 total_btree_bytes += buf->len;
6398 if (fs_root_objectid(btrfs_header_owner(buf)))
6399 total_fs_tree_bytes += buf->len;
6400 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6401 total_extent_tree_bytes += buf->len;
6402 if (!found_old_backref &&
6403 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6404 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6405 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6406 found_old_backref = 1;
6408 free_extent_buffer(buf);
6412 static int add_root_to_pending(struct extent_buffer *buf,
6413 struct cache_tree *extent_cache,
6414 struct cache_tree *pending,
6415 struct cache_tree *seen,
6416 struct cache_tree *nodes,
6419 struct extent_record tmpl;
6421 if (btrfs_header_level(buf) > 0)
6422 add_pending(nodes, seen, buf->start, buf->len);
6424 add_pending(pending, seen, buf->start, buf->len);
6426 memset(&tmpl, 0, sizeof(tmpl));
6427 tmpl.start = buf->start;
6432 tmpl.max_size = buf->len;
6433 add_extent_rec(extent_cache, &tmpl);
6435 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6436 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6437 add_tree_backref(extent_cache, buf->start, buf->start,
6440 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6444 /* as we fix the tree, we might be deleting blocks that
6445 * we're tracking for repair. This hook makes sure we
6446 * remove any backrefs for blocks as we are fixing them.
6448 static int free_extent_hook(struct btrfs_trans_handle *trans,
6449 struct btrfs_root *root,
6450 u64 bytenr, u64 num_bytes, u64 parent,
6451 u64 root_objectid, u64 owner, u64 offset,
6454 struct extent_record *rec;
6455 struct cache_extent *cache;
6457 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6459 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6460 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6464 rec = container_of(cache, struct extent_record, cache);
6466 struct data_backref *back;
6467 back = find_data_backref(rec, parent, root_objectid, owner,
6468 offset, 1, bytenr, num_bytes);
6471 if (back->node.found_ref) {
6472 back->found_ref -= refs_to_drop;
6474 rec->refs -= refs_to_drop;
6476 if (back->node.found_extent_tree) {
6477 back->num_refs -= refs_to_drop;
6478 if (rec->extent_item_refs)
6479 rec->extent_item_refs -= refs_to_drop;
6481 if (back->found_ref == 0)
6482 back->node.found_ref = 0;
6483 if (back->num_refs == 0)
6484 back->node.found_extent_tree = 0;
6486 if (!back->node.found_extent_tree && back->node.found_ref) {
6487 rb_erase(&back->node.node, &rec->backref_tree);
6491 struct tree_backref *back;
6492 back = find_tree_backref(rec, parent, root_objectid);
6495 if (back->node.found_ref) {
6498 back->node.found_ref = 0;
6500 if (back->node.found_extent_tree) {
6501 if (rec->extent_item_refs)
6502 rec->extent_item_refs--;
6503 back->node.found_extent_tree = 0;
6505 if (!back->node.found_extent_tree && back->node.found_ref) {
6506 rb_erase(&back->node.node, &rec->backref_tree);
6510 maybe_free_extent_rec(extent_cache, rec);
6515 static int delete_extent_records(struct btrfs_trans_handle *trans,
6516 struct btrfs_root *root,
6517 struct btrfs_path *path,
6518 u64 bytenr, u64 new_len)
6520 struct btrfs_key key;
6521 struct btrfs_key found_key;
6522 struct extent_buffer *leaf;
6527 key.objectid = bytenr;
6529 key.offset = (u64)-1;
6532 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6539 if (path->slots[0] == 0)
6545 leaf = path->nodes[0];
6546 slot = path->slots[0];
6548 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6549 if (found_key.objectid != bytenr)
6552 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6553 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6554 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6555 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6556 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6557 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6558 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6559 btrfs_release_path(path);
6560 if (found_key.type == 0) {
6561 if (found_key.offset == 0)
6563 key.offset = found_key.offset - 1;
6564 key.type = found_key.type;
6566 key.type = found_key.type - 1;
6567 key.offset = (u64)-1;
6571 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6572 found_key.objectid, found_key.type, found_key.offset);
6574 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6577 btrfs_release_path(path);
6579 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6580 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6581 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6582 found_key.offset : root->nodesize;
6584 ret = btrfs_update_block_group(trans, root, bytenr,
6591 btrfs_release_path(path);
6596 * for a single backref, this will allocate a new extent
6597 * and add the backref to it.
6599 static int record_extent(struct btrfs_trans_handle *trans,
6600 struct btrfs_fs_info *info,
6601 struct btrfs_path *path,
6602 struct extent_record *rec,
6603 struct extent_backref *back,
6604 int allocated, u64 flags)
6607 struct btrfs_root *extent_root = info->extent_root;
6608 struct extent_buffer *leaf;
6609 struct btrfs_key ins_key;
6610 struct btrfs_extent_item *ei;
6611 struct tree_backref *tback;
6612 struct data_backref *dback;
6613 struct btrfs_tree_block_info *bi;
6616 rec->max_size = max_t(u64, rec->max_size,
6617 info->extent_root->nodesize);
6620 u32 item_size = sizeof(*ei);
6623 item_size += sizeof(*bi);
6625 ins_key.objectid = rec->start;
6626 ins_key.offset = rec->max_size;
6627 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6629 ret = btrfs_insert_empty_item(trans, extent_root, path,
6630 &ins_key, item_size);
6634 leaf = path->nodes[0];
6635 ei = btrfs_item_ptr(leaf, path->slots[0],
6636 struct btrfs_extent_item);
6638 btrfs_set_extent_refs(leaf, ei, 0);
6639 btrfs_set_extent_generation(leaf, ei, rec->generation);
6641 if (back->is_data) {
6642 btrfs_set_extent_flags(leaf, ei,
6643 BTRFS_EXTENT_FLAG_DATA);
6645 struct btrfs_disk_key copy_key;;
6647 tback = to_tree_backref(back);
6648 bi = (struct btrfs_tree_block_info *)(ei + 1);
6649 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6652 btrfs_set_disk_key_objectid(©_key,
6653 rec->info_objectid);
6654 btrfs_set_disk_key_type(©_key, 0);
6655 btrfs_set_disk_key_offset(©_key, 0);
6657 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6658 btrfs_set_tree_block_key(leaf, bi, ©_key);
6660 btrfs_set_extent_flags(leaf, ei,
6661 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6664 btrfs_mark_buffer_dirty(leaf);
6665 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6666 rec->max_size, 1, 0);
6669 btrfs_release_path(path);
6672 if (back->is_data) {
6676 dback = to_data_backref(back);
6677 if (back->full_backref)
6678 parent = dback->parent;
6682 for (i = 0; i < dback->found_ref; i++) {
6683 /* if parent != 0, we're doing a full backref
6684 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6685 * just makes the backref allocator create a data
6688 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6689 rec->start, rec->max_size,
6693 BTRFS_FIRST_FREE_OBJECTID :
6699 fprintf(stderr, "adding new data backref"
6700 " on %llu %s %llu owner %llu"
6701 " offset %llu found %d\n",
6702 (unsigned long long)rec->start,
6703 back->full_backref ?
6705 back->full_backref ?
6706 (unsigned long long)parent :
6707 (unsigned long long)dback->root,
6708 (unsigned long long)dback->owner,
6709 (unsigned long long)dback->offset,
6714 tback = to_tree_backref(back);
6715 if (back->full_backref)
6716 parent = tback->parent;
6720 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6721 rec->start, rec->max_size,
6722 parent, tback->root, 0, 0);
6723 fprintf(stderr, "adding new tree backref on "
6724 "start %llu len %llu parent %llu root %llu\n",
6725 rec->start, rec->max_size, parent, tback->root);
6728 btrfs_release_path(path);
6732 static struct extent_entry *find_entry(struct list_head *entries,
6733 u64 bytenr, u64 bytes)
6735 struct extent_entry *entry = NULL;
6737 list_for_each_entry(entry, entries, list) {
6738 if (entry->bytenr == bytenr && entry->bytes == bytes)
6745 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6747 struct extent_entry *entry, *best = NULL, *prev = NULL;
6749 list_for_each_entry(entry, entries, list) {
6756 * If there are as many broken entries as entries then we know
6757 * not to trust this particular entry.
6759 if (entry->broken == entry->count)
6763 * If our current entry == best then we can't be sure our best
6764 * is really the best, so we need to keep searching.
6766 if (best && best->count == entry->count) {
6772 /* Prev == entry, not good enough, have to keep searching */
6773 if (!prev->broken && prev->count == entry->count)
6777 best = (prev->count > entry->count) ? prev : entry;
6778 else if (best->count < entry->count)
6786 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6787 struct data_backref *dback, struct extent_entry *entry)
6789 struct btrfs_trans_handle *trans;
6790 struct btrfs_root *root;
6791 struct btrfs_file_extent_item *fi;
6792 struct extent_buffer *leaf;
6793 struct btrfs_key key;
6797 key.objectid = dback->root;
6798 key.type = BTRFS_ROOT_ITEM_KEY;
6799 key.offset = (u64)-1;
6800 root = btrfs_read_fs_root(info, &key);
6802 fprintf(stderr, "Couldn't find root for our ref\n");
6807 * The backref points to the original offset of the extent if it was
6808 * split, so we need to search down to the offset we have and then walk
6809 * forward until we find the backref we're looking for.
6811 key.objectid = dback->owner;
6812 key.type = BTRFS_EXTENT_DATA_KEY;
6813 key.offset = dback->offset;
6814 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6816 fprintf(stderr, "Error looking up ref %d\n", ret);
6821 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6822 ret = btrfs_next_leaf(root, path);
6824 fprintf(stderr, "Couldn't find our ref, next\n");
6828 leaf = path->nodes[0];
6829 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6830 if (key.objectid != dback->owner ||
6831 key.type != BTRFS_EXTENT_DATA_KEY) {
6832 fprintf(stderr, "Couldn't find our ref, search\n");
6835 fi = btrfs_item_ptr(leaf, path->slots[0],
6836 struct btrfs_file_extent_item);
6837 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6838 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6840 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6845 btrfs_release_path(path);
6847 trans = btrfs_start_transaction(root, 1);
6849 return PTR_ERR(trans);
6852 * Ok we have the key of the file extent we want to fix, now we can cow
6853 * down to the thing and fix it.
6855 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6857 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6858 key.objectid, key.type, key.offset, ret);
6862 fprintf(stderr, "Well that's odd, we just found this key "
6863 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6868 leaf = path->nodes[0];
6869 fi = btrfs_item_ptr(leaf, path->slots[0],
6870 struct btrfs_file_extent_item);
6872 if (btrfs_file_extent_compression(leaf, fi) &&
6873 dback->disk_bytenr != entry->bytenr) {
6874 fprintf(stderr, "Ref doesn't match the record start and is "
6875 "compressed, please take a btrfs-image of this file "
6876 "system and send it to a btrfs developer so they can "
6877 "complete this functionality for bytenr %Lu\n",
6878 dback->disk_bytenr);
6883 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6884 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6885 } else if (dback->disk_bytenr > entry->bytenr) {
6886 u64 off_diff, offset;
6888 off_diff = dback->disk_bytenr - entry->bytenr;
6889 offset = btrfs_file_extent_offset(leaf, fi);
6890 if (dback->disk_bytenr + offset +
6891 btrfs_file_extent_num_bytes(leaf, fi) >
6892 entry->bytenr + entry->bytes) {
6893 fprintf(stderr, "Ref is past the entry end, please "
6894 "take a btrfs-image of this file system and "
6895 "send it to a btrfs developer, ref %Lu\n",
6896 dback->disk_bytenr);
6901 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6902 btrfs_set_file_extent_offset(leaf, fi, offset);
6903 } else if (dback->disk_bytenr < entry->bytenr) {
6906 offset = btrfs_file_extent_offset(leaf, fi);
6907 if (dback->disk_bytenr + offset < entry->bytenr) {
6908 fprintf(stderr, "Ref is before the entry start, please"
6909 " take a btrfs-image of this file system and "
6910 "send it to a btrfs developer, ref %Lu\n",
6911 dback->disk_bytenr);
6916 offset += dback->disk_bytenr;
6917 offset -= entry->bytenr;
6918 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6919 btrfs_set_file_extent_offset(leaf, fi, offset);
6922 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6925 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6926 * only do this if we aren't using compression, otherwise it's a
6929 if (!btrfs_file_extent_compression(leaf, fi))
6930 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6932 printf("ram bytes may be wrong?\n");
6933 btrfs_mark_buffer_dirty(leaf);
6935 err = btrfs_commit_transaction(trans, root);
6936 btrfs_release_path(path);
6937 return ret ? ret : err;
6940 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6941 struct extent_record *rec)
6943 struct extent_backref *back, *tmp;
6944 struct data_backref *dback;
6945 struct extent_entry *entry, *best = NULL;
6948 int broken_entries = 0;
6953 * Metadata is easy and the backrefs should always agree on bytenr and
6954 * size, if not we've got bigger issues.
6959 rbtree_postorder_for_each_entry_safe(back, tmp,
6960 &rec->backref_tree, node) {
6961 if (back->full_backref || !back->is_data)
6964 dback = to_data_backref(back);
6967 * We only pay attention to backrefs that we found a real
6970 if (dback->found_ref == 0)
6974 * For now we only catch when the bytes don't match, not the
6975 * bytenr. We can easily do this at the same time, but I want
6976 * to have a fs image to test on before we just add repair
6977 * functionality willy-nilly so we know we won't screw up the
6981 entry = find_entry(&entries, dback->disk_bytenr,
6984 entry = malloc(sizeof(struct extent_entry));
6989 memset(entry, 0, sizeof(*entry));
6990 entry->bytenr = dback->disk_bytenr;
6991 entry->bytes = dback->bytes;
6992 list_add_tail(&entry->list, &entries);
6997 * If we only have on entry we may think the entries agree when
6998 * in reality they don't so we have to do some extra checking.
7000 if (dback->disk_bytenr != rec->start ||
7001 dback->bytes != rec->nr || back->broken)
7012 /* Yay all the backrefs agree, carry on good sir */
7013 if (nr_entries <= 1 && !mismatch)
7016 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7017 "%Lu\n", rec->start);
7020 * First we want to see if the backrefs can agree amongst themselves who
7021 * is right, so figure out which one of the entries has the highest
7024 best = find_most_right_entry(&entries);
7027 * Ok so we may have an even split between what the backrefs think, so
7028 * this is where we use the extent ref to see what it thinks.
7031 entry = find_entry(&entries, rec->start, rec->nr);
7032 if (!entry && (!broken_entries || !rec->found_rec)) {
7033 fprintf(stderr, "Backrefs don't agree with each other "
7034 "and extent record doesn't agree with anybody,"
7035 " so we can't fix bytenr %Lu bytes %Lu\n",
7036 rec->start, rec->nr);
7039 } else if (!entry) {
7041 * Ok our backrefs were broken, we'll assume this is the
7042 * correct value and add an entry for this range.
7044 entry = malloc(sizeof(struct extent_entry));
7049 memset(entry, 0, sizeof(*entry));
7050 entry->bytenr = rec->start;
7051 entry->bytes = rec->nr;
7052 list_add_tail(&entry->list, &entries);
7056 best = find_most_right_entry(&entries);
7058 fprintf(stderr, "Backrefs and extent record evenly "
7059 "split on who is right, this is going to "
7060 "require user input to fix bytenr %Lu bytes "
7061 "%Lu\n", rec->start, rec->nr);
7068 * I don't think this can happen currently as we'll abort() if we catch
7069 * this case higher up, but in case somebody removes that we still can't
7070 * deal with it properly here yet, so just bail out of that's the case.
7072 if (best->bytenr != rec->start) {
7073 fprintf(stderr, "Extent start and backref starts don't match, "
7074 "please use btrfs-image on this file system and send "
7075 "it to a btrfs developer so they can make fsck fix "
7076 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7077 rec->start, rec->nr);
7083 * Ok great we all agreed on an extent record, let's go find the real
7084 * references and fix up the ones that don't match.
7086 rbtree_postorder_for_each_entry_safe(back, tmp,
7087 &rec->backref_tree, node) {
7088 if (back->full_backref || !back->is_data)
7091 dback = to_data_backref(back);
7094 * Still ignoring backrefs that don't have a real ref attached
7097 if (dback->found_ref == 0)
7100 if (dback->bytes == best->bytes &&
7101 dback->disk_bytenr == best->bytenr)
7104 ret = repair_ref(info, path, dback, best);
7110 * Ok we messed with the actual refs, which means we need to drop our
7111 * entire cache and go back and rescan. I know this is a huge pain and
7112 * adds a lot of extra work, but it's the only way to be safe. Once all
7113 * the backrefs agree we may not need to do anything to the extent
7118 while (!list_empty(&entries)) {
7119 entry = list_entry(entries.next, struct extent_entry, list);
7120 list_del_init(&entry->list);
7126 static int process_duplicates(struct btrfs_root *root,
7127 struct cache_tree *extent_cache,
7128 struct extent_record *rec)
7130 struct extent_record *good, *tmp;
7131 struct cache_extent *cache;
7135 * If we found a extent record for this extent then return, or if we
7136 * have more than one duplicate we are likely going to need to delete
7139 if (rec->found_rec || rec->num_duplicates > 1)
7142 /* Shouldn't happen but just in case */
7143 BUG_ON(!rec->num_duplicates);
7146 * So this happens if we end up with a backref that doesn't match the
7147 * actual extent entry. So either the backref is bad or the extent
7148 * entry is bad. Either way we want to have the extent_record actually
7149 * reflect what we found in the extent_tree, so we need to take the
7150 * duplicate out and use that as the extent_record since the only way we
7151 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7153 remove_cache_extent(extent_cache, &rec->cache);
7155 good = to_extent_record(rec->dups.next);
7156 list_del_init(&good->list);
7157 INIT_LIST_HEAD(&good->backrefs);
7158 INIT_LIST_HEAD(&good->dups);
7159 good->cache.start = good->start;
7160 good->cache.size = good->nr;
7161 good->content_checked = 0;
7162 good->owner_ref_checked = 0;
7163 good->num_duplicates = 0;
7164 good->refs = rec->refs;
7165 list_splice_init(&rec->backrefs, &good->backrefs);
7167 cache = lookup_cache_extent(extent_cache, good->start,
7171 tmp = container_of(cache, struct extent_record, cache);
7174 * If we find another overlapping extent and it's found_rec is
7175 * set then it's a duplicate and we need to try and delete
7178 if (tmp->found_rec || tmp->num_duplicates > 0) {
7179 if (list_empty(&good->list))
7180 list_add_tail(&good->list,
7181 &duplicate_extents);
7182 good->num_duplicates += tmp->num_duplicates + 1;
7183 list_splice_init(&tmp->dups, &good->dups);
7184 list_del_init(&tmp->list);
7185 list_add_tail(&tmp->list, &good->dups);
7186 remove_cache_extent(extent_cache, &tmp->cache);
7191 * Ok we have another non extent item backed extent rec, so lets
7192 * just add it to this extent and carry on like we did above.
7194 good->refs += tmp->refs;
7195 list_splice_init(&tmp->backrefs, &good->backrefs);
7196 remove_cache_extent(extent_cache, &tmp->cache);
7199 ret = insert_cache_extent(extent_cache, &good->cache);
7202 return good->num_duplicates ? 0 : 1;
7205 static int delete_duplicate_records(struct btrfs_root *root,
7206 struct extent_record *rec)
7208 struct btrfs_trans_handle *trans;
7209 LIST_HEAD(delete_list);
7210 struct btrfs_path *path;
7211 struct extent_record *tmp, *good, *n;
7214 struct btrfs_key key;
7216 path = btrfs_alloc_path();
7223 /* Find the record that covers all of the duplicates. */
7224 list_for_each_entry(tmp, &rec->dups, list) {
7225 if (good->start < tmp->start)
7227 if (good->nr > tmp->nr)
7230 if (tmp->start + tmp->nr < good->start + good->nr) {
7231 fprintf(stderr, "Ok we have overlapping extents that "
7232 "aren't completely covered by each other, this "
7233 "is going to require more careful thought. "
7234 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7235 tmp->start, tmp->nr, good->start, good->nr);
7242 list_add_tail(&rec->list, &delete_list);
7244 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7247 list_move_tail(&tmp->list, &delete_list);
7250 root = root->fs_info->extent_root;
7251 trans = btrfs_start_transaction(root, 1);
7252 if (IS_ERR(trans)) {
7253 ret = PTR_ERR(trans);
7257 list_for_each_entry(tmp, &delete_list, list) {
7258 if (tmp->found_rec == 0)
7260 key.objectid = tmp->start;
7261 key.type = BTRFS_EXTENT_ITEM_KEY;
7262 key.offset = tmp->nr;
7264 /* Shouldn't happen but just in case */
7265 if (tmp->metadata) {
7266 fprintf(stderr, "Well this shouldn't happen, extent "
7267 "record overlaps but is metadata? "
7268 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7272 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7278 ret = btrfs_del_item(trans, root, path);
7281 btrfs_release_path(path);
7284 err = btrfs_commit_transaction(trans, root);
7288 while (!list_empty(&delete_list)) {
7289 tmp = to_extent_record(delete_list.next);
7290 list_del_init(&tmp->list);
7296 while (!list_empty(&rec->dups)) {
7297 tmp = to_extent_record(rec->dups.next);
7298 list_del_init(&tmp->list);
7302 btrfs_free_path(path);
7304 if (!ret && !nr_del)
7305 rec->num_duplicates = 0;
7307 return ret ? ret : nr_del;
7310 static int find_possible_backrefs(struct btrfs_fs_info *info,
7311 struct btrfs_path *path,
7312 struct cache_tree *extent_cache,
7313 struct extent_record *rec)
7315 struct btrfs_root *root;
7316 struct extent_backref *back, *tmp;
7317 struct data_backref *dback;
7318 struct cache_extent *cache;
7319 struct btrfs_file_extent_item *fi;
7320 struct btrfs_key key;
7324 rbtree_postorder_for_each_entry_safe(back, tmp,
7325 &rec->backref_tree, node) {
7326 /* Don't care about full backrefs (poor unloved backrefs) */
7327 if (back->full_backref || !back->is_data)
7330 dback = to_data_backref(back);
7332 /* We found this one, we don't need to do a lookup */
7333 if (dback->found_ref)
7336 key.objectid = dback->root;
7337 key.type = BTRFS_ROOT_ITEM_KEY;
7338 key.offset = (u64)-1;
7340 root = btrfs_read_fs_root(info, &key);
7342 /* No root, definitely a bad ref, skip */
7343 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7345 /* Other err, exit */
7347 return PTR_ERR(root);
7349 key.objectid = dback->owner;
7350 key.type = BTRFS_EXTENT_DATA_KEY;
7351 key.offset = dback->offset;
7352 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7354 btrfs_release_path(path);
7357 /* Didn't find it, we can carry on */
7362 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7363 struct btrfs_file_extent_item);
7364 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7365 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7366 btrfs_release_path(path);
7367 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7369 struct extent_record *tmp;
7370 tmp = container_of(cache, struct extent_record, cache);
7373 * If we found an extent record for the bytenr for this
7374 * particular backref then we can't add it to our
7375 * current extent record. We only want to add backrefs
7376 * that don't have a corresponding extent item in the
7377 * extent tree since they likely belong to this record
7378 * and we need to fix it if it doesn't match bytenrs.
7384 dback->found_ref += 1;
7385 dback->disk_bytenr = bytenr;
7386 dback->bytes = bytes;
7389 * Set this so the verify backref code knows not to trust the
7390 * values in this backref.
7399 * Record orphan data ref into corresponding root.
7401 * Return 0 if the extent item contains data ref and recorded.
7402 * Return 1 if the extent item contains no useful data ref
7403 * On that case, it may contains only shared_dataref or metadata backref
7404 * or the file extent exists(this should be handled by the extent bytenr
7406 * Return <0 if something goes wrong.
7408 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7409 struct extent_record *rec)
7411 struct btrfs_key key;
7412 struct btrfs_root *dest_root;
7413 struct extent_backref *back, *tmp;
7414 struct data_backref *dback;
7415 struct orphan_data_extent *orphan;
7416 struct btrfs_path *path;
7417 int recorded_data_ref = 0;
7422 path = btrfs_alloc_path();
7425 rbtree_postorder_for_each_entry_safe(back, tmp,
7426 &rec->backref_tree, node) {
7427 if (back->full_backref || !back->is_data ||
7428 !back->found_extent_tree)
7430 dback = to_data_backref(back);
7431 if (dback->found_ref)
7433 key.objectid = dback->root;
7434 key.type = BTRFS_ROOT_ITEM_KEY;
7435 key.offset = (u64)-1;
7437 dest_root = btrfs_read_fs_root(fs_info, &key);
7439 /* For non-exist root we just skip it */
7440 if (IS_ERR(dest_root) || !dest_root)
7443 key.objectid = dback->owner;
7444 key.type = BTRFS_EXTENT_DATA_KEY;
7445 key.offset = dback->offset;
7447 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7449 * For ret < 0, it's OK since the fs-tree may be corrupted,
7450 * we need to record it for inode/file extent rebuild.
7451 * For ret > 0, we record it only for file extent rebuild.
7452 * For ret == 0, the file extent exists but only bytenr
7453 * mismatch, let the original bytenr fix routine to handle,
7459 orphan = malloc(sizeof(*orphan));
7464 INIT_LIST_HEAD(&orphan->list);
7465 orphan->root = dback->root;
7466 orphan->objectid = dback->owner;
7467 orphan->offset = dback->offset;
7468 orphan->disk_bytenr = rec->cache.start;
7469 orphan->disk_len = rec->cache.size;
7470 list_add(&dest_root->orphan_data_extents, &orphan->list);
7471 recorded_data_ref = 1;
7474 btrfs_free_path(path);
7476 return !recorded_data_ref;
7482 * when an incorrect extent item is found, this will delete
7483 * all of the existing entries for it and recreate them
7484 * based on what the tree scan found.
7486 static int fixup_extent_refs(struct btrfs_fs_info *info,
7487 struct cache_tree *extent_cache,
7488 struct extent_record *rec)
7490 struct btrfs_trans_handle *trans = NULL;
7492 struct btrfs_path *path;
7493 struct cache_extent *cache;
7494 struct extent_backref *back, *tmp;
7498 if (rec->flag_block_full_backref)
7499 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7501 path = btrfs_alloc_path();
7505 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7507 * Sometimes the backrefs themselves are so broken they don't
7508 * get attached to any meaningful rec, so first go back and
7509 * check any of our backrefs that we couldn't find and throw
7510 * them into the list if we find the backref so that
7511 * verify_backrefs can figure out what to do.
7513 ret = find_possible_backrefs(info, path, extent_cache, rec);
7518 /* step one, make sure all of the backrefs agree */
7519 ret = verify_backrefs(info, path, rec);
7523 trans = btrfs_start_transaction(info->extent_root, 1);
7524 if (IS_ERR(trans)) {
7525 ret = PTR_ERR(trans);
7529 /* step two, delete all the existing records */
7530 ret = delete_extent_records(trans, info->extent_root, path,
7531 rec->start, rec->max_size);
7536 /* was this block corrupt? If so, don't add references to it */
7537 cache = lookup_cache_extent(info->corrupt_blocks,
7538 rec->start, rec->max_size);
7544 /* step three, recreate all the refs we did find */
7545 rbtree_postorder_for_each_entry_safe(back, tmp,
7546 &rec->backref_tree, node) {
7548 * if we didn't find any references, don't create a
7551 if (!back->found_ref)
7554 rec->bad_full_backref = 0;
7555 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7563 int err = btrfs_commit_transaction(trans, info->extent_root);
7568 btrfs_free_path(path);
7572 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7573 struct extent_record *rec)
7575 struct btrfs_trans_handle *trans;
7576 struct btrfs_root *root = fs_info->extent_root;
7577 struct btrfs_path *path;
7578 struct btrfs_extent_item *ei;
7579 struct btrfs_key key;
7583 key.objectid = rec->start;
7584 if (rec->metadata) {
7585 key.type = BTRFS_METADATA_ITEM_KEY;
7586 key.offset = rec->info_level;
7588 key.type = BTRFS_EXTENT_ITEM_KEY;
7589 key.offset = rec->max_size;
7592 path = btrfs_alloc_path();
7596 trans = btrfs_start_transaction(root, 0);
7597 if (IS_ERR(trans)) {
7598 btrfs_free_path(path);
7599 return PTR_ERR(trans);
7602 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7604 btrfs_free_path(path);
7605 btrfs_commit_transaction(trans, root);
7608 fprintf(stderr, "Didn't find extent for %llu\n",
7609 (unsigned long long)rec->start);
7610 btrfs_free_path(path);
7611 btrfs_commit_transaction(trans, root);
7615 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7616 struct btrfs_extent_item);
7617 flags = btrfs_extent_flags(path->nodes[0], ei);
7618 if (rec->flag_block_full_backref) {
7619 fprintf(stderr, "setting full backref on %llu\n",
7620 (unsigned long long)key.objectid);
7621 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7623 fprintf(stderr, "clearing full backref on %llu\n",
7624 (unsigned long long)key.objectid);
7625 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7627 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7628 btrfs_mark_buffer_dirty(path->nodes[0]);
7629 btrfs_free_path(path);
7630 return btrfs_commit_transaction(trans, root);
7633 /* right now we only prune from the extent allocation tree */
7634 static int prune_one_block(struct btrfs_trans_handle *trans,
7635 struct btrfs_fs_info *info,
7636 struct btrfs_corrupt_block *corrupt)
7639 struct btrfs_path path;
7640 struct extent_buffer *eb;
7644 int level = corrupt->level + 1;
7646 btrfs_init_path(&path);
7648 /* we want to stop at the parent to our busted block */
7649 path.lowest_level = level;
7651 ret = btrfs_search_slot(trans, info->extent_root,
7652 &corrupt->key, &path, -1, 1);
7657 eb = path.nodes[level];
7664 * hopefully the search gave us the block we want to prune,
7665 * lets try that first
7667 slot = path.slots[level];
7668 found = btrfs_node_blockptr(eb, slot);
7669 if (found == corrupt->cache.start)
7672 nritems = btrfs_header_nritems(eb);
7674 /* the search failed, lets scan this node and hope we find it */
7675 for (slot = 0; slot < nritems; slot++) {
7676 found = btrfs_node_blockptr(eb, slot);
7677 if (found == corrupt->cache.start)
7681 * we couldn't find the bad block. TODO, search all the nodes for pointers
7684 if (eb == info->extent_root->node) {
7689 btrfs_release_path(&path);
7694 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7695 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7698 btrfs_release_path(&path);
7702 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7704 struct btrfs_trans_handle *trans = NULL;
7705 struct cache_extent *cache;
7706 struct btrfs_corrupt_block *corrupt;
7709 cache = search_cache_extent(info->corrupt_blocks, 0);
7713 trans = btrfs_start_transaction(info->extent_root, 1);
7715 return PTR_ERR(trans);
7717 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7718 prune_one_block(trans, info, corrupt);
7719 remove_cache_extent(info->corrupt_blocks, cache);
7722 return btrfs_commit_transaction(trans, info->extent_root);
7726 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7728 struct btrfs_block_group_cache *cache;
7733 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7734 &start, &end, EXTENT_DIRTY);
7737 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7743 cache = btrfs_lookup_first_block_group(fs_info, start);
7748 start = cache->key.objectid + cache->key.offset;
7752 static int check_extent_refs(struct btrfs_root *root,
7753 struct cache_tree *extent_cache)
7755 struct extent_record *rec;
7756 struct cache_extent *cache;
7765 * if we're doing a repair, we have to make sure
7766 * we don't allocate from the problem extents.
7767 * In the worst case, this will be all the
7770 cache = search_cache_extent(extent_cache, 0);
7772 rec = container_of(cache, struct extent_record, cache);
7773 set_extent_dirty(root->fs_info->excluded_extents,
7775 rec->start + rec->max_size - 1,
7777 cache = next_cache_extent(cache);
7780 /* pin down all the corrupted blocks too */
7781 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7783 set_extent_dirty(root->fs_info->excluded_extents,
7785 cache->start + cache->size - 1,
7787 cache = next_cache_extent(cache);
7789 prune_corrupt_blocks(root->fs_info);
7790 reset_cached_block_groups(root->fs_info);
7793 reset_cached_block_groups(root->fs_info);
7796 * We need to delete any duplicate entries we find first otherwise we
7797 * could mess up the extent tree when we have backrefs that actually
7798 * belong to a different extent item and not the weird duplicate one.
7800 while (repair && !list_empty(&duplicate_extents)) {
7801 rec = to_extent_record(duplicate_extents.next);
7802 list_del_init(&rec->list);
7804 /* Sometimes we can find a backref before we find an actual
7805 * extent, so we need to process it a little bit to see if there
7806 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7807 * if this is a backref screwup. If we need to delete stuff
7808 * process_duplicates() will return 0, otherwise it will return
7811 if (process_duplicates(root, extent_cache, rec))
7813 ret = delete_duplicate_records(root, rec);
7817 * delete_duplicate_records will return the number of entries
7818 * deleted, so if it's greater than 0 then we know we actually
7819 * did something and we need to remove.
7833 cache = search_cache_extent(extent_cache, 0);
7836 rec = container_of(cache, struct extent_record, cache);
7837 if (rec->num_duplicates) {
7838 fprintf(stderr, "extent item %llu has multiple extent "
7839 "items\n", (unsigned long long)rec->start);
7844 if (rec->refs != rec->extent_item_refs) {
7845 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7846 (unsigned long long)rec->start,
7847 (unsigned long long)rec->nr);
7848 fprintf(stderr, "extent item %llu, found %llu\n",
7849 (unsigned long long)rec->extent_item_refs,
7850 (unsigned long long)rec->refs);
7851 ret = record_orphan_data_extents(root->fs_info, rec);
7858 * we can't use the extent to repair file
7859 * extent, let the fallback method handle it.
7861 if (!fixed && repair) {
7862 ret = fixup_extent_refs(
7873 if (all_backpointers_checked(rec, 1)) {
7874 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7875 (unsigned long long)rec->start,
7876 (unsigned long long)rec->nr);
7878 if (!fixed && !recorded && repair) {
7879 ret = fixup_extent_refs(root->fs_info,
7888 if (!rec->owner_ref_checked) {
7889 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7890 (unsigned long long)rec->start,
7891 (unsigned long long)rec->nr);
7892 if (!fixed && !recorded && repair) {
7893 ret = fixup_extent_refs(root->fs_info,
7902 if (rec->bad_full_backref) {
7903 fprintf(stderr, "bad full backref, on [%llu]\n",
7904 (unsigned long long)rec->start);
7906 ret = fixup_extent_flags(root->fs_info, rec);
7915 * Although it's not a extent ref's problem, we reuse this
7916 * routine for error reporting.
7917 * No repair function yet.
7919 if (rec->crossing_stripes) {
7921 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7922 rec->start, rec->start + rec->max_size);
7927 if (rec->wrong_chunk_type) {
7929 "bad extent [%llu, %llu), type mismatch with chunk\n",
7930 rec->start, rec->start + rec->max_size);
7935 remove_cache_extent(extent_cache, cache);
7936 free_all_extent_backrefs(rec);
7937 if (!init_extent_tree && repair && (!cur_err || fixed))
7938 clear_extent_dirty(root->fs_info->excluded_extents,
7940 rec->start + rec->max_size - 1,
7946 if (ret && ret != -EAGAIN) {
7947 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7950 struct btrfs_trans_handle *trans;
7952 root = root->fs_info->extent_root;
7953 trans = btrfs_start_transaction(root, 1);
7954 if (IS_ERR(trans)) {
7955 ret = PTR_ERR(trans);
7959 btrfs_fix_block_accounting(trans, root);
7960 ret = btrfs_commit_transaction(trans, root);
7965 fprintf(stderr, "repaired damaged extent references\n");
7971 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7975 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7976 stripe_size = length;
7977 stripe_size /= num_stripes;
7978 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7979 stripe_size = length * 2;
7980 stripe_size /= num_stripes;
7981 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7982 stripe_size = length;
7983 stripe_size /= (num_stripes - 1);
7984 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7985 stripe_size = length;
7986 stripe_size /= (num_stripes - 2);
7988 stripe_size = length;
7994 * Check the chunk with its block group/dev list ref:
7995 * Return 0 if all refs seems valid.
7996 * Return 1 if part of refs seems valid, need later check for rebuild ref
7997 * like missing block group and needs to search extent tree to rebuild them.
7998 * Return -1 if essential refs are missing and unable to rebuild.
8000 static int check_chunk_refs(struct chunk_record *chunk_rec,
8001 struct block_group_tree *block_group_cache,
8002 struct device_extent_tree *dev_extent_cache,
8005 struct cache_extent *block_group_item;
8006 struct block_group_record *block_group_rec;
8007 struct cache_extent *dev_extent_item;
8008 struct device_extent_record *dev_extent_rec;
8012 int metadump_v2 = 0;
8016 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8019 if (block_group_item) {
8020 block_group_rec = container_of(block_group_item,
8021 struct block_group_record,
8023 if (chunk_rec->length != block_group_rec->offset ||
8024 chunk_rec->offset != block_group_rec->objectid ||
8026 chunk_rec->type_flags != block_group_rec->flags)) {
8029 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8030 chunk_rec->objectid,
8035 chunk_rec->type_flags,
8036 block_group_rec->objectid,
8037 block_group_rec->type,
8038 block_group_rec->offset,
8039 block_group_rec->offset,
8040 block_group_rec->objectid,
8041 block_group_rec->flags);
8044 list_del_init(&block_group_rec->list);
8045 chunk_rec->bg_rec = block_group_rec;
8050 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8051 chunk_rec->objectid,
8056 chunk_rec->type_flags);
8063 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8064 chunk_rec->num_stripes);
8065 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8066 devid = chunk_rec->stripes[i].devid;
8067 offset = chunk_rec->stripes[i].offset;
8068 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8069 devid, offset, length);
8070 if (dev_extent_item) {
8071 dev_extent_rec = container_of(dev_extent_item,
8072 struct device_extent_record,
8074 if (dev_extent_rec->objectid != devid ||
8075 dev_extent_rec->offset != offset ||
8076 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8077 dev_extent_rec->length != length) {
8080 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8081 chunk_rec->objectid,
8084 chunk_rec->stripes[i].devid,
8085 chunk_rec->stripes[i].offset,
8086 dev_extent_rec->objectid,
8087 dev_extent_rec->offset,
8088 dev_extent_rec->length);
8091 list_move(&dev_extent_rec->chunk_list,
8092 &chunk_rec->dextents);
8097 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8098 chunk_rec->objectid,
8101 chunk_rec->stripes[i].devid,
8102 chunk_rec->stripes[i].offset);
8109 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8110 int check_chunks(struct cache_tree *chunk_cache,
8111 struct block_group_tree *block_group_cache,
8112 struct device_extent_tree *dev_extent_cache,
8113 struct list_head *good, struct list_head *bad,
8114 struct list_head *rebuild, int silent)
8116 struct cache_extent *chunk_item;
8117 struct chunk_record *chunk_rec;
8118 struct block_group_record *bg_rec;
8119 struct device_extent_record *dext_rec;
8123 chunk_item = first_cache_extent(chunk_cache);
8124 while (chunk_item) {
8125 chunk_rec = container_of(chunk_item, struct chunk_record,
8127 err = check_chunk_refs(chunk_rec, block_group_cache,
8128 dev_extent_cache, silent);
8131 if (err == 0 && good)
8132 list_add_tail(&chunk_rec->list, good);
8133 if (err > 0 && rebuild)
8134 list_add_tail(&chunk_rec->list, rebuild);
8136 list_add_tail(&chunk_rec->list, bad);
8137 chunk_item = next_cache_extent(chunk_item);
8140 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8143 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8151 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8155 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8166 static int check_device_used(struct device_record *dev_rec,
8167 struct device_extent_tree *dext_cache)
8169 struct cache_extent *cache;
8170 struct device_extent_record *dev_extent_rec;
8173 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8175 dev_extent_rec = container_of(cache,
8176 struct device_extent_record,
8178 if (dev_extent_rec->objectid != dev_rec->devid)
8181 list_del_init(&dev_extent_rec->device_list);
8182 total_byte += dev_extent_rec->length;
8183 cache = next_cache_extent(cache);
8186 if (total_byte != dev_rec->byte_used) {
8188 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8189 total_byte, dev_rec->byte_used, dev_rec->objectid,
8190 dev_rec->type, dev_rec->offset);
8197 /* check btrfs_dev_item -> btrfs_dev_extent */
8198 static int check_devices(struct rb_root *dev_cache,
8199 struct device_extent_tree *dev_extent_cache)
8201 struct rb_node *dev_node;
8202 struct device_record *dev_rec;
8203 struct device_extent_record *dext_rec;
8207 dev_node = rb_first(dev_cache);
8209 dev_rec = container_of(dev_node, struct device_record, node);
8210 err = check_device_used(dev_rec, dev_extent_cache);
8214 dev_node = rb_next(dev_node);
8216 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8219 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8220 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8227 static int add_root_item_to_list(struct list_head *head,
8228 u64 objectid, u64 bytenr, u64 last_snapshot,
8229 u8 level, u8 drop_level,
8230 int level_size, struct btrfs_key *drop_key)
8233 struct root_item_record *ri_rec;
8234 ri_rec = malloc(sizeof(*ri_rec));
8237 ri_rec->bytenr = bytenr;
8238 ri_rec->objectid = objectid;
8239 ri_rec->level = level;
8240 ri_rec->level_size = level_size;
8241 ri_rec->drop_level = drop_level;
8242 ri_rec->last_snapshot = last_snapshot;
8244 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8245 list_add_tail(&ri_rec->list, head);
8250 static void free_root_item_list(struct list_head *list)
8252 struct root_item_record *ri_rec;
8254 while (!list_empty(list)) {
8255 ri_rec = list_first_entry(list, struct root_item_record,
8257 list_del_init(&ri_rec->list);
8262 static int deal_root_from_list(struct list_head *list,
8263 struct btrfs_root *root,
8264 struct block_info *bits,
8266 struct cache_tree *pending,
8267 struct cache_tree *seen,
8268 struct cache_tree *reada,
8269 struct cache_tree *nodes,
8270 struct cache_tree *extent_cache,
8271 struct cache_tree *chunk_cache,
8272 struct rb_root *dev_cache,
8273 struct block_group_tree *block_group_cache,
8274 struct device_extent_tree *dev_extent_cache)
8279 while (!list_empty(list)) {
8280 struct root_item_record *rec;
8281 struct extent_buffer *buf;
8282 rec = list_entry(list->next,
8283 struct root_item_record, list);
8285 buf = read_tree_block(root->fs_info->tree_root,
8286 rec->bytenr, rec->level_size, 0);
8287 if (!extent_buffer_uptodate(buf)) {
8288 free_extent_buffer(buf);
8292 add_root_to_pending(buf, extent_cache, pending,
8293 seen, nodes, rec->objectid);
8295 * To rebuild extent tree, we need deal with snapshot
8296 * one by one, otherwise we deal with node firstly which
8297 * can maximize readahead.
8300 ret = run_next_block(root, bits, bits_nr, &last,
8301 pending, seen, reada, nodes,
8302 extent_cache, chunk_cache,
8303 dev_cache, block_group_cache,
8304 dev_extent_cache, rec);
8308 free_extent_buffer(buf);
8309 list_del(&rec->list);
8315 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8316 reada, nodes, extent_cache, chunk_cache,
8317 dev_cache, block_group_cache,
8318 dev_extent_cache, NULL);
8328 static int check_chunks_and_extents(struct btrfs_root *root)
8330 struct rb_root dev_cache;
8331 struct cache_tree chunk_cache;
8332 struct block_group_tree block_group_cache;
8333 struct device_extent_tree dev_extent_cache;
8334 struct cache_tree extent_cache;
8335 struct cache_tree seen;
8336 struct cache_tree pending;
8337 struct cache_tree reada;
8338 struct cache_tree nodes;
8339 struct extent_io_tree excluded_extents;
8340 struct cache_tree corrupt_blocks;
8341 struct btrfs_path path;
8342 struct btrfs_key key;
8343 struct btrfs_key found_key;
8345 struct block_info *bits;
8347 struct extent_buffer *leaf;
8349 struct btrfs_root_item ri;
8350 struct list_head dropping_trees;
8351 struct list_head normal_trees;
8352 struct btrfs_root *root1;
8357 dev_cache = RB_ROOT;
8358 cache_tree_init(&chunk_cache);
8359 block_group_tree_init(&block_group_cache);
8360 device_extent_tree_init(&dev_extent_cache);
8362 cache_tree_init(&extent_cache);
8363 cache_tree_init(&seen);
8364 cache_tree_init(&pending);
8365 cache_tree_init(&nodes);
8366 cache_tree_init(&reada);
8367 cache_tree_init(&corrupt_blocks);
8368 extent_io_tree_init(&excluded_extents);
8369 INIT_LIST_HEAD(&dropping_trees);
8370 INIT_LIST_HEAD(&normal_trees);
8373 root->fs_info->excluded_extents = &excluded_extents;
8374 root->fs_info->fsck_extent_cache = &extent_cache;
8375 root->fs_info->free_extent_hook = free_extent_hook;
8376 root->fs_info->corrupt_blocks = &corrupt_blocks;
8380 bits = malloc(bits_nr * sizeof(struct block_info));
8386 if (ctx.progress_enabled) {
8387 ctx.tp = TASK_EXTENTS;
8388 task_start(ctx.info);
8392 root1 = root->fs_info->tree_root;
8393 level = btrfs_header_level(root1->node);
8394 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8395 root1->node->start, 0, level, 0,
8396 root1->nodesize, NULL);
8399 root1 = root->fs_info->chunk_root;
8400 level = btrfs_header_level(root1->node);
8401 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8402 root1->node->start, 0, level, 0,
8403 root1->nodesize, NULL);
8406 btrfs_init_path(&path);
8409 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8410 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8415 leaf = path.nodes[0];
8416 slot = path.slots[0];
8417 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8418 ret = btrfs_next_leaf(root, &path);
8421 leaf = path.nodes[0];
8422 slot = path.slots[0];
8424 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8425 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8426 unsigned long offset;
8429 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8430 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8431 last_snapshot = btrfs_root_last_snapshot(&ri);
8432 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8433 level = btrfs_root_level(&ri);
8434 level_size = root->nodesize;
8435 ret = add_root_item_to_list(&normal_trees,
8437 btrfs_root_bytenr(&ri),
8438 last_snapshot, level,
8439 0, level_size, NULL);
8443 level = btrfs_root_level(&ri);
8444 level_size = root->nodesize;
8445 objectid = found_key.objectid;
8446 btrfs_disk_key_to_cpu(&found_key,
8448 ret = add_root_item_to_list(&dropping_trees,
8450 btrfs_root_bytenr(&ri),
8451 last_snapshot, level,
8453 level_size, &found_key);
8460 btrfs_release_path(&path);
8463 * check_block can return -EAGAIN if it fixes something, please keep
8464 * this in mind when dealing with return values from these functions, if
8465 * we get -EAGAIN we want to fall through and restart the loop.
8467 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8468 &seen, &reada, &nodes, &extent_cache,
8469 &chunk_cache, &dev_cache, &block_group_cache,
8476 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8477 &pending, &seen, &reada, &nodes,
8478 &extent_cache, &chunk_cache, &dev_cache,
8479 &block_group_cache, &dev_extent_cache);
8486 ret = check_chunks(&chunk_cache, &block_group_cache,
8487 &dev_extent_cache, NULL, NULL, NULL, 0);
8494 ret = check_extent_refs(root, &extent_cache);
8501 ret = check_devices(&dev_cache, &dev_extent_cache);
8506 task_stop(ctx.info);
8508 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8509 extent_io_tree_cleanup(&excluded_extents);
8510 root->fs_info->fsck_extent_cache = NULL;
8511 root->fs_info->free_extent_hook = NULL;
8512 root->fs_info->corrupt_blocks = NULL;
8513 root->fs_info->excluded_extents = NULL;
8516 free_chunk_cache_tree(&chunk_cache);
8517 free_device_cache_tree(&dev_cache);
8518 free_block_group_tree(&block_group_cache);
8519 free_device_extent_tree(&dev_extent_cache);
8520 free_extent_cache_tree(&seen);
8521 free_extent_cache_tree(&pending);
8522 free_extent_cache_tree(&reada);
8523 free_extent_cache_tree(&nodes);
8526 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8527 free_extent_cache_tree(&seen);
8528 free_extent_cache_tree(&pending);
8529 free_extent_cache_tree(&reada);
8530 free_extent_cache_tree(&nodes);
8531 free_chunk_cache_tree(&chunk_cache);
8532 free_block_group_tree(&block_group_cache);
8533 free_device_cache_tree(&dev_cache);
8534 free_device_extent_tree(&dev_extent_cache);
8535 free_extent_record_cache(root->fs_info, &extent_cache);
8536 free_root_item_list(&normal_trees);
8537 free_root_item_list(&dropping_trees);
8538 extent_io_tree_cleanup(&excluded_extents);
8542 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8543 struct btrfs_root *root, int overwrite)
8545 struct extent_buffer *c;
8546 struct extent_buffer *old = root->node;
8549 struct btrfs_disk_key disk_key = {0,0,0};
8555 extent_buffer_get(c);
8558 c = btrfs_alloc_free_block(trans, root,
8560 root->root_key.objectid,
8561 &disk_key, level, 0, 0);
8564 extent_buffer_get(c);
8568 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8569 btrfs_set_header_level(c, level);
8570 btrfs_set_header_bytenr(c, c->start);
8571 btrfs_set_header_generation(c, trans->transid);
8572 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8573 btrfs_set_header_owner(c, root->root_key.objectid);
8575 write_extent_buffer(c, root->fs_info->fsid,
8576 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8578 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8579 btrfs_header_chunk_tree_uuid(c),
8582 btrfs_mark_buffer_dirty(c);
8584 * this case can happen in the following case:
8586 * 1.overwrite previous root.
8588 * 2.reinit reloc data root, this is because we skip pin
8589 * down reloc data tree before which means we can allocate
8590 * same block bytenr here.
8592 if (old->start == c->start) {
8593 btrfs_set_root_generation(&root->root_item,
8595 root->root_item.level = btrfs_header_level(root->node);
8596 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8597 &root->root_key, &root->root_item);
8599 free_extent_buffer(c);
8603 free_extent_buffer(old);
8605 add_root_to_dirty_list(root);
8609 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8610 struct extent_buffer *eb, int tree_root)
8612 struct extent_buffer *tmp;
8613 struct btrfs_root_item *ri;
8614 struct btrfs_key key;
8617 int level = btrfs_header_level(eb);
8623 * If we have pinned this block before, don't pin it again.
8624 * This can not only avoid forever loop with broken filesystem
8625 * but also give us some speedups.
8627 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8628 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8631 btrfs_pin_extent(fs_info, eb->start, eb->len);
8633 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8634 nritems = btrfs_header_nritems(eb);
8635 for (i = 0; i < nritems; i++) {
8637 btrfs_item_key_to_cpu(eb, &key, i);
8638 if (key.type != BTRFS_ROOT_ITEM_KEY)
8640 /* Skip the extent root and reloc roots */
8641 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8642 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8643 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8645 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8646 bytenr = btrfs_disk_root_bytenr(eb, ri);
8649 * If at any point we start needing the real root we
8650 * will have to build a stump root for the root we are
8651 * in, but for now this doesn't actually use the root so
8652 * just pass in extent_root.
8654 tmp = read_tree_block(fs_info->extent_root, bytenr,
8656 if (!extent_buffer_uptodate(tmp)) {
8657 fprintf(stderr, "Error reading root block\n");
8660 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8661 free_extent_buffer(tmp);
8665 bytenr = btrfs_node_blockptr(eb, i);
8667 /* If we aren't the tree root don't read the block */
8668 if (level == 1 && !tree_root) {
8669 btrfs_pin_extent(fs_info, bytenr, nodesize);
8673 tmp = read_tree_block(fs_info->extent_root, bytenr,
8675 if (!extent_buffer_uptodate(tmp)) {
8676 fprintf(stderr, "Error reading tree block\n");
8679 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8680 free_extent_buffer(tmp);
8689 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8693 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8697 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8700 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8702 struct btrfs_block_group_cache *cache;
8703 struct btrfs_path *path;
8704 struct extent_buffer *leaf;
8705 struct btrfs_chunk *chunk;
8706 struct btrfs_key key;
8710 path = btrfs_alloc_path();
8715 key.type = BTRFS_CHUNK_ITEM_KEY;
8718 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8720 btrfs_free_path(path);
8725 * We do this in case the block groups were screwed up and had alloc
8726 * bits that aren't actually set on the chunks. This happens with
8727 * restored images every time and could happen in real life I guess.
8729 fs_info->avail_data_alloc_bits = 0;
8730 fs_info->avail_metadata_alloc_bits = 0;
8731 fs_info->avail_system_alloc_bits = 0;
8733 /* First we need to create the in-memory block groups */
8735 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8736 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8738 btrfs_free_path(path);
8746 leaf = path->nodes[0];
8747 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8748 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8753 chunk = btrfs_item_ptr(leaf, path->slots[0],
8754 struct btrfs_chunk);
8755 btrfs_add_block_group(fs_info, 0,
8756 btrfs_chunk_type(leaf, chunk),
8757 key.objectid, key.offset,
8758 btrfs_chunk_length(leaf, chunk));
8759 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8760 key.offset + btrfs_chunk_length(leaf, chunk),
8766 cache = btrfs_lookup_first_block_group(fs_info, start);
8770 start = cache->key.objectid + cache->key.offset;
8773 btrfs_free_path(path);
8777 static int reset_balance(struct btrfs_trans_handle *trans,
8778 struct btrfs_fs_info *fs_info)
8780 struct btrfs_root *root = fs_info->tree_root;
8781 struct btrfs_path *path;
8782 struct extent_buffer *leaf;
8783 struct btrfs_key key;
8784 int del_slot, del_nr = 0;
8788 path = btrfs_alloc_path();
8792 key.objectid = BTRFS_BALANCE_OBJECTID;
8793 key.type = BTRFS_BALANCE_ITEM_KEY;
8796 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8801 goto reinit_data_reloc;
8806 ret = btrfs_del_item(trans, root, path);
8809 btrfs_release_path(path);
8811 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8812 key.type = BTRFS_ROOT_ITEM_KEY;
8815 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8819 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8824 ret = btrfs_del_items(trans, root, path,
8831 btrfs_release_path(path);
8834 ret = btrfs_search_slot(trans, root, &key, path,
8841 leaf = path->nodes[0];
8842 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8843 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
8845 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
8850 del_slot = path->slots[0];
8859 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
8863 btrfs_release_path(path);
8866 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
8867 key.type = BTRFS_ROOT_ITEM_KEY;
8868 key.offset = (u64)-1;
8869 root = btrfs_read_fs_root(fs_info, &key);
8871 fprintf(stderr, "Error reading data reloc tree\n");
8872 ret = PTR_ERR(root);
8875 record_root_in_trans(trans, root);
8876 ret = btrfs_fsck_reinit_root(trans, root, 0);
8879 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
8881 btrfs_free_path(path);
8885 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
8886 struct btrfs_fs_info *fs_info)
8892 * The only reason we don't do this is because right now we're just
8893 * walking the trees we find and pinning down their bytes, we don't look
8894 * at any of the leaves. In order to do mixed groups we'd have to check
8895 * the leaves of any fs roots and pin down the bytes for any file
8896 * extents we find. Not hard but why do it if we don't have to?
8898 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
8899 fprintf(stderr, "We don't support re-initing the extent tree "
8900 "for mixed block groups yet, please notify a btrfs "
8901 "developer you want to do this so they can add this "
8902 "functionality.\n");
8907 * first we need to walk all of the trees except the extent tree and pin
8908 * down the bytes that are in use so we don't overwrite any existing
8911 ret = pin_metadata_blocks(fs_info);
8913 fprintf(stderr, "error pinning down used bytes\n");
8918 * Need to drop all the block groups since we're going to recreate all
8921 btrfs_free_block_groups(fs_info);
8922 ret = reset_block_groups(fs_info);
8924 fprintf(stderr, "error resetting the block groups\n");
8928 /* Ok we can allocate now, reinit the extent root */
8929 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
8931 fprintf(stderr, "extent root initialization failed\n");
8933 * When the transaction code is updated we should end the
8934 * transaction, but for now progs only knows about commit so
8935 * just return an error.
8941 * Now we have all the in-memory block groups setup so we can make
8942 * allocations properly, and the metadata we care about is safe since we
8943 * pinned all of it above.
8946 struct btrfs_block_group_cache *cache;
8948 cache = btrfs_lookup_first_block_group(fs_info, start);
8951 start = cache->key.objectid + cache->key.offset;
8952 ret = btrfs_insert_item(trans, fs_info->extent_root,
8953 &cache->key, &cache->item,
8954 sizeof(cache->item));
8956 fprintf(stderr, "Error adding block group\n");
8959 btrfs_extent_post_op(trans, fs_info->extent_root);
8962 ret = reset_balance(trans, fs_info);
8964 fprintf(stderr, "error resetting the pending balance\n");
8969 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
8971 struct btrfs_path *path;
8972 struct btrfs_trans_handle *trans;
8973 struct btrfs_key key;
8976 printf("Recowing metadata block %llu\n", eb->start);
8977 key.objectid = btrfs_header_owner(eb);
8978 key.type = BTRFS_ROOT_ITEM_KEY;
8979 key.offset = (u64)-1;
8981 root = btrfs_read_fs_root(root->fs_info, &key);
8983 fprintf(stderr, "Couldn't find owner root %llu\n",
8985 return PTR_ERR(root);
8988 path = btrfs_alloc_path();
8992 trans = btrfs_start_transaction(root, 1);
8993 if (IS_ERR(trans)) {
8994 btrfs_free_path(path);
8995 return PTR_ERR(trans);
8998 path->lowest_level = btrfs_header_level(eb);
8999 if (path->lowest_level)
9000 btrfs_node_key_to_cpu(eb, &key, 0);
9002 btrfs_item_key_to_cpu(eb, &key, 0);
9004 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9005 btrfs_commit_transaction(trans, root);
9006 btrfs_free_path(path);
9010 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9012 struct btrfs_path *path;
9013 struct btrfs_trans_handle *trans;
9014 struct btrfs_key key;
9017 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9018 bad->key.type, bad->key.offset);
9019 key.objectid = bad->root_id;
9020 key.type = BTRFS_ROOT_ITEM_KEY;
9021 key.offset = (u64)-1;
9023 root = btrfs_read_fs_root(root->fs_info, &key);
9025 fprintf(stderr, "Couldn't find owner root %llu\n",
9027 return PTR_ERR(root);
9030 path = btrfs_alloc_path();
9034 trans = btrfs_start_transaction(root, 1);
9035 if (IS_ERR(trans)) {
9036 btrfs_free_path(path);
9037 return PTR_ERR(trans);
9040 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9046 ret = btrfs_del_item(trans, root, path);
9048 btrfs_commit_transaction(trans, root);
9049 btrfs_free_path(path);
9053 static int zero_log_tree(struct btrfs_root *root)
9055 struct btrfs_trans_handle *trans;
9058 trans = btrfs_start_transaction(root, 1);
9059 if (IS_ERR(trans)) {
9060 ret = PTR_ERR(trans);
9063 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9064 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9065 ret = btrfs_commit_transaction(trans, root);
9069 static int populate_csum(struct btrfs_trans_handle *trans,
9070 struct btrfs_root *csum_root, char *buf, u64 start,
9077 while (offset < len) {
9078 sectorsize = csum_root->sectorsize;
9079 ret = read_extent_data(csum_root, buf, start + offset,
9083 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9084 start + offset, buf, sectorsize);
9087 offset += sectorsize;
9092 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9093 struct btrfs_root *csum_root,
9094 struct btrfs_root *cur_root)
9096 struct btrfs_path *path;
9097 struct btrfs_key key;
9098 struct extent_buffer *node;
9099 struct btrfs_file_extent_item *fi;
9106 path = btrfs_alloc_path();
9109 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9119 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9122 /* Iterate all regular file extents and fill its csum */
9124 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9126 if (key.type != BTRFS_EXTENT_DATA_KEY)
9128 node = path->nodes[0];
9129 slot = path->slots[0];
9130 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9131 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9133 start = btrfs_file_extent_disk_bytenr(node, fi);
9134 len = btrfs_file_extent_disk_num_bytes(node, fi);
9136 ret = populate_csum(trans, csum_root, buf, start, len);
9143 * TODO: if next leaf is corrupted, jump to nearest next valid
9146 ret = btrfs_next_item(cur_root, path);
9156 btrfs_free_path(path);
9161 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9162 struct btrfs_root *csum_root)
9164 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9165 struct btrfs_path *path;
9166 struct btrfs_root *tree_root = fs_info->tree_root;
9167 struct btrfs_root *cur_root;
9168 struct extent_buffer *node;
9169 struct btrfs_key key;
9173 path = btrfs_alloc_path();
9177 key.objectid = BTRFS_FS_TREE_OBJECTID;
9179 key.type = BTRFS_ROOT_ITEM_KEY;
9181 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9190 node = path->nodes[0];
9191 slot = path->slots[0];
9192 btrfs_item_key_to_cpu(node, &key, slot);
9193 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9195 if (key.type != BTRFS_ROOT_ITEM_KEY)
9197 if (!is_fstree(key.objectid))
9199 key.offset = (u64)-1;
9201 cur_root = btrfs_read_fs_root(fs_info, &key);
9202 if (IS_ERR(cur_root) || !cur_root) {
9203 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9207 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9212 ret = btrfs_next_item(tree_root, path);
9222 btrfs_free_path(path);
9226 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9227 struct btrfs_root *csum_root)
9229 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9230 struct btrfs_path *path;
9231 struct btrfs_extent_item *ei;
9232 struct extent_buffer *leaf;
9234 struct btrfs_key key;
9237 path = btrfs_alloc_path();
9242 key.type = BTRFS_EXTENT_ITEM_KEY;
9245 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9247 btrfs_free_path(path);
9251 buf = malloc(csum_root->sectorsize);
9253 btrfs_free_path(path);
9258 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9259 ret = btrfs_next_leaf(extent_root, path);
9267 leaf = path->nodes[0];
9269 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9270 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9275 ei = btrfs_item_ptr(leaf, path->slots[0],
9276 struct btrfs_extent_item);
9277 if (!(btrfs_extent_flags(leaf, ei) &
9278 BTRFS_EXTENT_FLAG_DATA)) {
9283 ret = populate_csum(trans, csum_root, buf, key.objectid,
9290 btrfs_free_path(path);
9296 * Recalculate the csum and put it into the csum tree.
9298 * Extent tree init will wipe out all the extent info, so in that case, we
9299 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9300 * will use fs/subvol trees to init the csum tree.
9302 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9303 struct btrfs_root *csum_root,
9307 return fill_csum_tree_from_fs(trans, csum_root);
9309 return fill_csum_tree_from_extent(trans, csum_root);
9312 static void free_roots_info_cache(void)
9314 if (!roots_info_cache)
9317 while (!cache_tree_empty(roots_info_cache)) {
9318 struct cache_extent *entry;
9319 struct root_item_info *rii;
9321 entry = first_cache_extent(roots_info_cache);
9324 remove_cache_extent(roots_info_cache, entry);
9325 rii = container_of(entry, struct root_item_info, cache_extent);
9329 free(roots_info_cache);
9330 roots_info_cache = NULL;
9333 static int build_roots_info_cache(struct btrfs_fs_info *info)
9336 struct btrfs_key key;
9337 struct extent_buffer *leaf;
9338 struct btrfs_path *path;
9340 if (!roots_info_cache) {
9341 roots_info_cache = malloc(sizeof(*roots_info_cache));
9342 if (!roots_info_cache)
9344 cache_tree_init(roots_info_cache);
9347 path = btrfs_alloc_path();
9352 key.type = BTRFS_EXTENT_ITEM_KEY;
9355 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9358 leaf = path->nodes[0];
9361 struct btrfs_key found_key;
9362 struct btrfs_extent_item *ei;
9363 struct btrfs_extent_inline_ref *iref;
9364 int slot = path->slots[0];
9369 struct cache_extent *entry;
9370 struct root_item_info *rii;
9372 if (slot >= btrfs_header_nritems(leaf)) {
9373 ret = btrfs_next_leaf(info->extent_root, path);
9380 leaf = path->nodes[0];
9381 slot = path->slots[0];
9384 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9386 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9387 found_key.type != BTRFS_METADATA_ITEM_KEY)
9390 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9391 flags = btrfs_extent_flags(leaf, ei);
9393 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9394 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9397 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9398 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9399 level = found_key.offset;
9401 struct btrfs_tree_block_info *binfo;
9403 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9404 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9405 level = btrfs_tree_block_level(leaf, binfo);
9409 * For a root extent, it must be of the following type and the
9410 * first (and only one) iref in the item.
9412 type = btrfs_extent_inline_ref_type(leaf, iref);
9413 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9416 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9417 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9419 rii = malloc(sizeof(struct root_item_info));
9424 rii->cache_extent.start = root_id;
9425 rii->cache_extent.size = 1;
9426 rii->level = (u8)-1;
9427 entry = &rii->cache_extent;
9428 ret = insert_cache_extent(roots_info_cache, entry);
9431 rii = container_of(entry, struct root_item_info,
9435 ASSERT(rii->cache_extent.start == root_id);
9436 ASSERT(rii->cache_extent.size == 1);
9438 if (level > rii->level || rii->level == (u8)-1) {
9440 rii->bytenr = found_key.objectid;
9441 rii->gen = btrfs_extent_generation(leaf, ei);
9442 rii->node_count = 1;
9443 } else if (level == rii->level) {
9451 btrfs_free_path(path);
9456 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9457 struct btrfs_path *path,
9458 const struct btrfs_key *root_key,
9459 const int read_only_mode)
9461 const u64 root_id = root_key->objectid;
9462 struct cache_extent *entry;
9463 struct root_item_info *rii;
9464 struct btrfs_root_item ri;
9465 unsigned long offset;
9467 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9470 "Error: could not find extent items for root %llu\n",
9471 root_key->objectid);
9475 rii = container_of(entry, struct root_item_info, cache_extent);
9476 ASSERT(rii->cache_extent.start == root_id);
9477 ASSERT(rii->cache_extent.size == 1);
9479 if (rii->node_count != 1) {
9481 "Error: could not find btree root extent for root %llu\n",
9486 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9487 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9489 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9490 btrfs_root_level(&ri) != rii->level ||
9491 btrfs_root_generation(&ri) != rii->gen) {
9494 * If we're in repair mode but our caller told us to not update
9495 * the root item, i.e. just check if it needs to be updated, don't
9496 * print this message, since the caller will call us again shortly
9497 * for the same root item without read only mode (the caller will
9498 * open a transaction first).
9500 if (!(read_only_mode && repair))
9502 "%sroot item for root %llu,"
9503 " current bytenr %llu, current gen %llu, current level %u,"
9504 " new bytenr %llu, new gen %llu, new level %u\n",
9505 (read_only_mode ? "" : "fixing "),
9507 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9508 btrfs_root_level(&ri),
9509 rii->bytenr, rii->gen, rii->level);
9511 if (btrfs_root_generation(&ri) > rii->gen) {
9513 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9514 root_id, btrfs_root_generation(&ri), rii->gen);
9518 if (!read_only_mode) {
9519 btrfs_set_root_bytenr(&ri, rii->bytenr);
9520 btrfs_set_root_level(&ri, rii->level);
9521 btrfs_set_root_generation(&ri, rii->gen);
9522 write_extent_buffer(path->nodes[0], &ri,
9523 offset, sizeof(ri));
9533 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9534 * caused read-only snapshots to be corrupted if they were created at a moment
9535 * when the source subvolume/snapshot had orphan items. The issue was that the
9536 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9537 * node instead of the post orphan cleanup root node.
9538 * So this function, and its callees, just detects and fixes those cases. Even
9539 * though the regression was for read-only snapshots, this function applies to
9540 * any snapshot/subvolume root.
9541 * This must be run before any other repair code - not doing it so, makes other
9542 * repair code delete or modify backrefs in the extent tree for example, which
9543 * will result in an inconsistent fs after repairing the root items.
9545 static int repair_root_items(struct btrfs_fs_info *info)
9547 struct btrfs_path *path = NULL;
9548 struct btrfs_key key;
9549 struct extent_buffer *leaf;
9550 struct btrfs_trans_handle *trans = NULL;
9555 ret = build_roots_info_cache(info);
9559 path = btrfs_alloc_path();
9565 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9566 key.type = BTRFS_ROOT_ITEM_KEY;
9571 * Avoid opening and committing transactions if a leaf doesn't have
9572 * any root items that need to be fixed, so that we avoid rotating
9573 * backup roots unnecessarily.
9576 trans = btrfs_start_transaction(info->tree_root, 1);
9577 if (IS_ERR(trans)) {
9578 ret = PTR_ERR(trans);
9583 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9587 leaf = path->nodes[0];
9590 struct btrfs_key found_key;
9592 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9593 int no_more_keys = find_next_key(path, &key);
9595 btrfs_release_path(path);
9597 ret = btrfs_commit_transaction(trans,
9609 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9611 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9613 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9616 ret = maybe_repair_root_item(info, path, &found_key,
9621 if (!trans && repair) {
9624 btrfs_release_path(path);
9634 free_roots_info_cache();
9635 btrfs_free_path(path);
9637 btrfs_commit_transaction(trans, info->tree_root);
9644 const char * const cmd_check_usage[] = {
9645 "btrfs check [options] <device>",
9646 "Check structural integrity of a filesystem (unmounted).",
9647 "Check structural integrity of an unmounted filesystem. Verify internal",
9648 "trees' consistency and item connectivity. In the repair mode try to",
9649 "fix the problems found.",
9650 "WARNING: the repair mode is considered dangerous",
9652 "-s|--super <superblock> use this superblock copy",
9653 "-b|--backup use the first valid backup root copy",
9654 "--repair try to repair the filesystem",
9655 "--readonly run in read-only mode (default)",
9656 "--init-csum-tree create a new CRC tree",
9657 "--init-extent-tree create a new extent tree",
9658 "--check-data-csum verify checksums of data blocks",
9659 "-Q|--qgroup-report print a report on qgroup consistency",
9660 "-E|--subvol-extents <subvolid>",
9661 " print subvolume extents and sharing state",
9662 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9663 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9664 "-p|--progress indicate progress",
9668 int cmd_check(int argc, char **argv)
9670 struct cache_tree root_cache;
9671 struct btrfs_root *root;
9672 struct btrfs_fs_info *info;
9675 u64 tree_root_bytenr = 0;
9676 u64 chunk_root_bytenr = 0;
9677 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9680 int init_csum_tree = 0;
9682 int qgroup_report = 0;
9683 int qgroups_repaired = 0;
9684 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9688 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9689 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9690 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9691 static const struct option long_options[] = {
9692 { "super", required_argument, NULL, 's' },
9693 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9694 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9695 { "init-csum-tree", no_argument, NULL,
9696 GETOPT_VAL_INIT_CSUM },
9697 { "init-extent-tree", no_argument, NULL,
9698 GETOPT_VAL_INIT_EXTENT },
9699 { "check-data-csum", no_argument, NULL,
9700 GETOPT_VAL_CHECK_CSUM },
9701 { "backup", no_argument, NULL, 'b' },
9702 { "subvol-extents", required_argument, NULL, 'E' },
9703 { "qgroup-report", no_argument, NULL, 'Q' },
9704 { "tree-root", required_argument, NULL, 'r' },
9705 { "chunk-root", required_argument, NULL,
9706 GETOPT_VAL_CHUNK_TREE },
9707 { "progress", no_argument, NULL, 'p' },
9711 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9715 case 'a': /* ignored */ break;
9717 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9720 num = arg_strtou64(optarg);
9721 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9723 "ERROR: super mirror should be less than: %d\n",
9724 BTRFS_SUPER_MIRROR_MAX);
9727 bytenr = btrfs_sb_offset(((int)num));
9728 printf("using SB copy %llu, bytenr %llu\n", num,
9729 (unsigned long long)bytenr);
9735 subvolid = arg_strtou64(optarg);
9738 tree_root_bytenr = arg_strtou64(optarg);
9740 case GETOPT_VAL_CHUNK_TREE:
9741 chunk_root_bytenr = arg_strtou64(optarg);
9744 ctx.progress_enabled = true;
9748 usage(cmd_check_usage);
9749 case GETOPT_VAL_REPAIR:
9750 printf("enabling repair mode\n");
9752 ctree_flags |= OPEN_CTREE_WRITES;
9754 case GETOPT_VAL_READONLY:
9757 case GETOPT_VAL_INIT_CSUM:
9758 printf("Creating a new CRC tree\n");
9761 ctree_flags |= OPEN_CTREE_WRITES;
9763 case GETOPT_VAL_INIT_EXTENT:
9764 init_extent_tree = 1;
9765 ctree_flags |= (OPEN_CTREE_WRITES |
9766 OPEN_CTREE_NO_BLOCK_GROUPS);
9769 case GETOPT_VAL_CHECK_CSUM:
9770 check_data_csum = 1;
9775 if (check_argc_exact(argc - optind, 1))
9776 usage(cmd_check_usage);
9778 if (ctx.progress_enabled) {
9779 ctx.tp = TASK_NOTHING;
9780 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9783 /* This check is the only reason for --readonly to exist */
9784 if (readonly && repair) {
9785 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9790 cache_tree_init(&root_cache);
9792 if((ret = check_mounted(argv[optind])) < 0) {
9793 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9796 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9801 /* only allow partial opening under repair mode */
9803 ctree_flags |= OPEN_CTREE_PARTIAL;
9805 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9806 chunk_root_bytenr, ctree_flags);
9808 fprintf(stderr, "Couldn't open file system\n");
9814 root = info->fs_root;
9817 * repair mode will force us to commit transaction which
9818 * will make us fail to load log tree when mounting.
9820 if (repair && btrfs_super_log_root(info->super_copy)) {
9821 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9826 ret = zero_log_tree(root);
9828 fprintf(stderr, "fail to zero log tree\n");
9833 uuid_unparse(info->super_copy->fsid, uuidbuf);
9834 if (qgroup_report) {
9835 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
9837 ret = qgroup_verify_all(info);
9843 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
9844 subvolid, argv[optind], uuidbuf);
9845 ret = print_extent_state(info, subvolid);
9848 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
9850 if (!extent_buffer_uptodate(info->tree_root->node) ||
9851 !extent_buffer_uptodate(info->dev_root->node) ||
9852 !extent_buffer_uptodate(info->chunk_root->node)) {
9853 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9858 if (init_extent_tree || init_csum_tree) {
9859 struct btrfs_trans_handle *trans;
9861 trans = btrfs_start_transaction(info->extent_root, 0);
9862 if (IS_ERR(trans)) {
9863 fprintf(stderr, "Error starting transaction\n");
9864 ret = PTR_ERR(trans);
9868 if (init_extent_tree) {
9869 printf("Creating a new extent tree\n");
9870 ret = reinit_extent_tree(trans, info);
9875 if (init_csum_tree) {
9876 fprintf(stderr, "Reinit crc root\n");
9877 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
9879 fprintf(stderr, "crc root initialization failed\n");
9884 ret = fill_csum_tree(trans, info->csum_root,
9887 fprintf(stderr, "crc refilling failed\n");
9892 * Ok now we commit and run the normal fsck, which will add
9893 * extent entries for all of the items it finds.
9895 ret = btrfs_commit_transaction(trans, info->extent_root);
9899 if (!extent_buffer_uptodate(info->extent_root->node)) {
9900 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
9904 if (!extent_buffer_uptodate(info->csum_root->node)) {
9905 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
9910 if (!ctx.progress_enabled)
9911 fprintf(stderr, "checking extents\n");
9912 ret = check_chunks_and_extents(root);
9914 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
9916 ret = repair_root_items(info);
9920 fprintf(stderr, "Fixed %d roots.\n", ret);
9922 } else if (ret > 0) {
9924 "Found %d roots with an outdated root item.\n",
9927 "Please run a filesystem check with the option --repair to fix them.\n");
9932 if (!ctx.progress_enabled) {
9933 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
9934 fprintf(stderr, "checking free space tree\n");
9936 fprintf(stderr, "checking free space cache\n");
9938 ret = check_space_cache(root);
9943 * We used to have to have these hole extents in between our real
9944 * extents so if we don't have this flag set we need to make sure there
9945 * are no gaps in the file extents for inodes, otherwise we can just
9946 * ignore it when this happens.
9948 no_holes = btrfs_fs_incompat(root->fs_info,
9949 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
9950 if (!ctx.progress_enabled)
9951 fprintf(stderr, "checking fs roots\n");
9952 ret = check_fs_roots(root, &root_cache);
9956 fprintf(stderr, "checking csums\n");
9957 ret = check_csums(root);
9961 fprintf(stderr, "checking root refs\n");
9962 ret = check_root_refs(root, &root_cache);
9966 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
9967 struct extent_buffer *eb;
9969 eb = list_first_entry(&root->fs_info->recow_ebs,
9970 struct extent_buffer, recow);
9971 list_del_init(&eb->recow);
9972 ret = recow_extent_buffer(root, eb);
9977 while (!list_empty(&delete_items)) {
9978 struct bad_item *bad;
9980 bad = list_first_entry(&delete_items, struct bad_item, list);
9981 list_del_init(&bad->list);
9983 ret = delete_bad_item(root, bad);
9987 if (info->quota_enabled) {
9989 fprintf(stderr, "checking quota groups\n");
9990 err = qgroup_verify_all(info);
9994 err = repair_qgroups(info, &qgroups_repaired);
9999 if (!list_empty(&root->fs_info->recow_ebs)) {
10000 fprintf(stderr, "Transid errors in file system\n");
10004 /* Don't override original ret */
10005 if (!ret && qgroups_repaired)
10006 ret = qgroups_repaired;
10008 if (found_old_backref) { /*
10009 * there was a disk format change when mixed
10010 * backref was in testing tree. The old format
10011 * existed about one week.
10013 printf("\n * Found old mixed backref format. "
10014 "The old format is not supported! *"
10015 "\n * Please mount the FS in readonly mode, "
10016 "backup data and re-format the FS. *\n\n");
10019 printf("found %llu bytes used err is %d\n",
10020 (unsigned long long)bytes_used, ret);
10021 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10022 printf("total tree bytes: %llu\n",
10023 (unsigned long long)total_btree_bytes);
10024 printf("total fs tree bytes: %llu\n",
10025 (unsigned long long)total_fs_tree_bytes);
10026 printf("total extent tree bytes: %llu\n",
10027 (unsigned long long)total_extent_tree_bytes);
10028 printf("btree space waste bytes: %llu\n",
10029 (unsigned long long)btree_space_waste);
10030 printf("file data blocks allocated: %llu\n referenced %llu\n",
10031 (unsigned long long)data_bytes_allocated,
10032 (unsigned long long)data_bytes_referenced);
10034 free_qgroup_counts();
10035 free_root_recs_tree(&root_cache);
10039 if (ctx.progress_enabled)
10040 task_deinit(ctx.info);