2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
446 static void *print_status_check(void *p)
448 struct task_ctx *priv = p;
449 const char work_indicator[] = { '.', 'o', 'O', 'o' };
451 static char *task_position_string[] = {
453 "checking free space cache",
457 task_period_start(priv->info, 1000 /* 1s */);
459 if (priv->tp == TASK_NOTHING)
463 printf("%s [%c]\r", task_position_string[priv->tp],
464 work_indicator[count % 4]);
467 task_period_wait(priv->info);
472 static int print_status_return(void *p)
480 /* Compatible function to allow reuse of old codes */
481 static u64 first_extent_gap(struct rb_root *holes)
483 struct file_extent_hole *hole;
485 if (RB_EMPTY_ROOT(holes))
488 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
492 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
494 struct file_extent_hole *hole1;
495 struct file_extent_hole *hole2;
497 hole1 = rb_entry(node1, struct file_extent_hole, node);
498 hole2 = rb_entry(node2, struct file_extent_hole, node);
500 if (hole1->start > hole2->start)
502 if (hole1->start < hole2->start)
504 /* Now hole1->start == hole2->start */
505 if (hole1->len >= hole2->len)
507 * Hole 1 will be merge center
508 * Same hole will be merged later
511 /* Hole 2 will be merge center */
516 * Add a hole to the record
518 * This will do hole merge for copy_file_extent_holes(),
519 * which will ensure there won't be continuous holes.
521 static int add_file_extent_hole(struct rb_root *holes,
524 struct file_extent_hole *hole;
525 struct file_extent_hole *prev = NULL;
526 struct file_extent_hole *next = NULL;
528 hole = malloc(sizeof(*hole));
533 /* Since compare will not return 0, no -EEXIST will happen */
534 rb_insert(holes, &hole->node, compare_hole);
536 /* simple merge with previous hole */
537 if (rb_prev(&hole->node))
538 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
540 if (prev && prev->start + prev->len >= hole->start) {
541 hole->len = hole->start + hole->len - prev->start;
542 hole->start = prev->start;
543 rb_erase(&prev->node, holes);
548 /* iterate merge with next holes */
550 if (!rb_next(&hole->node))
552 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
554 if (hole->start + hole->len >= next->start) {
555 if (hole->start + hole->len <= next->start + next->len)
556 hole->len = next->start + next->len -
558 rb_erase(&next->node, holes);
567 static int compare_hole_range(struct rb_node *node, void *data)
569 struct file_extent_hole *hole;
572 hole = (struct file_extent_hole *)data;
575 hole = rb_entry(node, struct file_extent_hole, node);
576 if (start < hole->start)
578 if (start >= hole->start && start < hole->start + hole->len)
584 * Delete a hole in the record
586 * This will do the hole split and is much restrict than add.
588 static int del_file_extent_hole(struct rb_root *holes,
591 struct file_extent_hole *hole;
592 struct file_extent_hole tmp;
597 struct rb_node *node;
604 node = rb_search(holes, &tmp, compare_hole_range, NULL);
607 hole = rb_entry(node, struct file_extent_hole, node);
608 if (start + len > hole->start + hole->len)
612 * Now there will be no overlap, delete the hole and re-add the
613 * split(s) if they exists.
615 if (start > hole->start) {
616 prev_start = hole->start;
617 prev_len = start - hole->start;
620 if (hole->start + hole->len > start + len) {
621 next_start = start + len;
622 next_len = hole->start + hole->len - start - len;
625 rb_erase(node, holes);
628 ret = add_file_extent_hole(holes, prev_start, prev_len);
633 ret = add_file_extent_hole(holes, next_start, next_len);
640 static int copy_file_extent_holes(struct rb_root *dst,
643 struct file_extent_hole *hole;
644 struct rb_node *node;
647 node = rb_first(src);
649 hole = rb_entry(node, struct file_extent_hole, node);
650 ret = add_file_extent_hole(dst, hole->start, hole->len);
653 node = rb_next(node);
658 static void free_file_extent_holes(struct rb_root *holes)
660 struct rb_node *node;
661 struct file_extent_hole *hole;
663 node = rb_first(holes);
665 hole = rb_entry(node, struct file_extent_hole, node);
666 rb_erase(node, holes);
668 node = rb_first(holes);
672 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
674 static void record_root_in_trans(struct btrfs_trans_handle *trans,
675 struct btrfs_root *root)
677 if (root->last_trans != trans->transid) {
678 root->track_dirty = 1;
679 root->last_trans = trans->transid;
680 root->commit_root = root->node;
681 extent_buffer_get(root->node);
685 static u8 imode_to_type(u32 imode)
688 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
689 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
690 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
691 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
692 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
693 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
694 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
695 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
698 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
702 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
704 struct device_record *rec1;
705 struct device_record *rec2;
707 rec1 = rb_entry(node1, struct device_record, node);
708 rec2 = rb_entry(node2, struct device_record, node);
709 if (rec1->devid > rec2->devid)
711 else if (rec1->devid < rec2->devid)
717 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
719 struct inode_record *rec;
720 struct inode_backref *backref;
721 struct inode_backref *orig;
722 struct inode_backref *tmp;
723 struct orphan_data_extent *src_orphan;
724 struct orphan_data_extent *dst_orphan;
728 rec = malloc(sizeof(*rec));
730 return ERR_PTR(-ENOMEM);
731 memcpy(rec, orig_rec, sizeof(*rec));
733 INIT_LIST_HEAD(&rec->backrefs);
734 INIT_LIST_HEAD(&rec->orphan_extents);
735 rec->holes = RB_ROOT;
737 list_for_each_entry(orig, &orig_rec->backrefs, list) {
738 size = sizeof(*orig) + orig->namelen + 1;
739 backref = malloc(size);
744 memcpy(backref, orig, size);
745 list_add_tail(&backref->list, &rec->backrefs);
747 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
748 dst_orphan = malloc(sizeof(*dst_orphan));
753 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
754 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
756 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
762 if (!list_empty(&rec->backrefs))
763 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
764 list_del(&orig->list);
768 if (!list_empty(&rec->orphan_extents))
769 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
770 list_del(&orig->list);
779 static void print_orphan_data_extents(struct list_head *orphan_extents,
782 struct orphan_data_extent *orphan;
784 if (list_empty(orphan_extents))
786 printf("The following data extent is lost in tree %llu:\n",
788 list_for_each_entry(orphan, orphan_extents, list) {
789 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
790 orphan->objectid, orphan->offset, orphan->disk_bytenr,
795 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
797 u64 root_objectid = root->root_key.objectid;
798 int errors = rec->errors;
802 /* reloc root errors, we print its corresponding fs root objectid*/
803 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
804 root_objectid = root->root_key.offset;
805 fprintf(stderr, "reloc");
807 fprintf(stderr, "root %llu inode %llu errors %x",
808 (unsigned long long) root_objectid,
809 (unsigned long long) rec->ino, rec->errors);
811 if (errors & I_ERR_NO_INODE_ITEM)
812 fprintf(stderr, ", no inode item");
813 if (errors & I_ERR_NO_ORPHAN_ITEM)
814 fprintf(stderr, ", no orphan item");
815 if (errors & I_ERR_DUP_INODE_ITEM)
816 fprintf(stderr, ", dup inode item");
817 if (errors & I_ERR_DUP_DIR_INDEX)
818 fprintf(stderr, ", dup dir index");
819 if (errors & I_ERR_ODD_DIR_ITEM)
820 fprintf(stderr, ", odd dir item");
821 if (errors & I_ERR_ODD_FILE_EXTENT)
822 fprintf(stderr, ", odd file extent");
823 if (errors & I_ERR_BAD_FILE_EXTENT)
824 fprintf(stderr, ", bad file extent");
825 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
826 fprintf(stderr, ", file extent overlap");
827 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
828 fprintf(stderr, ", file extent discount");
829 if (errors & I_ERR_DIR_ISIZE_WRONG)
830 fprintf(stderr, ", dir isize wrong");
831 if (errors & I_ERR_FILE_NBYTES_WRONG)
832 fprintf(stderr, ", nbytes wrong");
833 if (errors & I_ERR_ODD_CSUM_ITEM)
834 fprintf(stderr, ", odd csum item");
835 if (errors & I_ERR_SOME_CSUM_MISSING)
836 fprintf(stderr, ", some csum missing");
837 if (errors & I_ERR_LINK_COUNT_WRONG)
838 fprintf(stderr, ", link count wrong");
839 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
840 fprintf(stderr, ", orphan file extent");
841 fprintf(stderr, "\n");
842 /* Print the orphan extents if needed */
843 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
844 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
846 /* Print the holes if needed */
847 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
848 struct file_extent_hole *hole;
849 struct rb_node *node;
852 node = rb_first(&rec->holes);
853 fprintf(stderr, "Found file extent holes:\n");
856 hole = rb_entry(node, struct file_extent_hole, node);
857 fprintf(stderr, "\tstart: %llu, len: %llu\n",
858 hole->start, hole->len);
859 node = rb_next(node);
862 fprintf(stderr, "\tstart: 0, len: %llu\n",
863 round_up(rec->isize, root->sectorsize));
867 static void print_ref_error(int errors)
869 if (errors & REF_ERR_NO_DIR_ITEM)
870 fprintf(stderr, ", no dir item");
871 if (errors & REF_ERR_NO_DIR_INDEX)
872 fprintf(stderr, ", no dir index");
873 if (errors & REF_ERR_NO_INODE_REF)
874 fprintf(stderr, ", no inode ref");
875 if (errors & REF_ERR_DUP_DIR_ITEM)
876 fprintf(stderr, ", dup dir item");
877 if (errors & REF_ERR_DUP_DIR_INDEX)
878 fprintf(stderr, ", dup dir index");
879 if (errors & REF_ERR_DUP_INODE_REF)
880 fprintf(stderr, ", dup inode ref");
881 if (errors & REF_ERR_INDEX_UNMATCH)
882 fprintf(stderr, ", index mismatch");
883 if (errors & REF_ERR_FILETYPE_UNMATCH)
884 fprintf(stderr, ", filetype mismatch");
885 if (errors & REF_ERR_NAME_TOO_LONG)
886 fprintf(stderr, ", name too long");
887 if (errors & REF_ERR_NO_ROOT_REF)
888 fprintf(stderr, ", no root ref");
889 if (errors & REF_ERR_NO_ROOT_BACKREF)
890 fprintf(stderr, ", no root backref");
891 if (errors & REF_ERR_DUP_ROOT_REF)
892 fprintf(stderr, ", dup root ref");
893 if (errors & REF_ERR_DUP_ROOT_BACKREF)
894 fprintf(stderr, ", dup root backref");
895 fprintf(stderr, "\n");
898 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
901 struct ptr_node *node;
902 struct cache_extent *cache;
903 struct inode_record *rec = NULL;
906 cache = lookup_cache_extent(inode_cache, ino, 1);
908 node = container_of(cache, struct ptr_node, cache);
910 if (mod && rec->refs > 1) {
911 node->data = clone_inode_rec(rec);
912 if (IS_ERR(node->data))
918 rec = calloc(1, sizeof(*rec));
920 return ERR_PTR(-ENOMEM);
922 rec->extent_start = (u64)-1;
924 INIT_LIST_HEAD(&rec->backrefs);
925 INIT_LIST_HEAD(&rec->orphan_extents);
926 rec->holes = RB_ROOT;
928 node = malloc(sizeof(*node));
931 return ERR_PTR(-ENOMEM);
933 node->cache.start = ino;
934 node->cache.size = 1;
937 if (ino == BTRFS_FREE_INO_OBJECTID)
940 ret = insert_cache_extent(inode_cache, &node->cache);
942 return ERR_PTR(-EEXIST);
947 static void free_orphan_data_extents(struct list_head *orphan_extents)
949 struct orphan_data_extent *orphan;
951 while (!list_empty(orphan_extents)) {
952 orphan = list_entry(orphan_extents->next,
953 struct orphan_data_extent, list);
954 list_del(&orphan->list);
959 static void free_inode_rec(struct inode_record *rec)
961 struct inode_backref *backref;
966 while (!list_empty(&rec->backrefs)) {
967 backref = to_inode_backref(rec->backrefs.next);
968 list_del(&backref->list);
971 free_orphan_data_extents(&rec->orphan_extents);
972 free_file_extent_holes(&rec->holes);
976 static int can_free_inode_rec(struct inode_record *rec)
978 if (!rec->errors && rec->checked && rec->found_inode_item &&
979 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
984 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
985 struct inode_record *rec)
987 struct cache_extent *cache;
988 struct inode_backref *tmp, *backref;
989 struct ptr_node *node;
990 unsigned char filetype;
992 if (!rec->found_inode_item)
995 filetype = imode_to_type(rec->imode);
996 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
997 if (backref->found_dir_item && backref->found_dir_index) {
998 if (backref->filetype != filetype)
999 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1000 if (!backref->errors && backref->found_inode_ref &&
1001 rec->nlink == rec->found_link) {
1002 list_del(&backref->list);
1008 if (!rec->checked || rec->merging)
1011 if (S_ISDIR(rec->imode)) {
1012 if (rec->found_size != rec->isize)
1013 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1014 if (rec->found_file_extent)
1015 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1016 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1017 if (rec->found_dir_item)
1018 rec->errors |= I_ERR_ODD_DIR_ITEM;
1019 if (rec->found_size != rec->nbytes)
1020 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1021 if (rec->nlink > 0 && !no_holes &&
1022 (rec->extent_end < rec->isize ||
1023 first_extent_gap(&rec->holes) < rec->isize))
1024 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1027 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1028 if (rec->found_csum_item && rec->nodatasum)
1029 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1030 if (rec->some_csum_missing && !rec->nodatasum)
1031 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1034 BUG_ON(rec->refs != 1);
1035 if (can_free_inode_rec(rec)) {
1036 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1037 node = container_of(cache, struct ptr_node, cache);
1038 BUG_ON(node->data != rec);
1039 remove_cache_extent(inode_cache, &node->cache);
1041 free_inode_rec(rec);
1045 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1047 struct btrfs_path path;
1048 struct btrfs_key key;
1051 key.objectid = BTRFS_ORPHAN_OBJECTID;
1052 key.type = BTRFS_ORPHAN_ITEM_KEY;
1055 btrfs_init_path(&path);
1056 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1057 btrfs_release_path(&path);
1063 static int process_inode_item(struct extent_buffer *eb,
1064 int slot, struct btrfs_key *key,
1065 struct shared_node *active_node)
1067 struct inode_record *rec;
1068 struct btrfs_inode_item *item;
1070 rec = active_node->current;
1071 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1072 if (rec->found_inode_item) {
1073 rec->errors |= I_ERR_DUP_INODE_ITEM;
1076 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1077 rec->nlink = btrfs_inode_nlink(eb, item);
1078 rec->isize = btrfs_inode_size(eb, item);
1079 rec->nbytes = btrfs_inode_nbytes(eb, item);
1080 rec->imode = btrfs_inode_mode(eb, item);
1081 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1083 rec->found_inode_item = 1;
1084 if (rec->nlink == 0)
1085 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1086 maybe_free_inode_rec(&active_node->inode_cache, rec);
1090 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1092 int namelen, u64 dir)
1094 struct inode_backref *backref;
1096 list_for_each_entry(backref, &rec->backrefs, list) {
1097 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1099 if (backref->dir != dir || backref->namelen != namelen)
1101 if (memcmp(name, backref->name, namelen))
1106 backref = malloc(sizeof(*backref) + namelen + 1);
1109 memset(backref, 0, sizeof(*backref));
1111 backref->namelen = namelen;
1112 memcpy(backref->name, name, namelen);
1113 backref->name[namelen] = '\0';
1114 list_add_tail(&backref->list, &rec->backrefs);
1118 static int add_inode_backref(struct cache_tree *inode_cache,
1119 u64 ino, u64 dir, u64 index,
1120 const char *name, int namelen,
1121 int filetype, int itemtype, int errors)
1123 struct inode_record *rec;
1124 struct inode_backref *backref;
1126 rec = get_inode_rec(inode_cache, ino, 1);
1127 BUG_ON(IS_ERR(rec));
1128 backref = get_inode_backref(rec, name, namelen, dir);
1131 backref->errors |= errors;
1132 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1133 if (backref->found_dir_index)
1134 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1135 if (backref->found_inode_ref && backref->index != index)
1136 backref->errors |= REF_ERR_INDEX_UNMATCH;
1137 if (backref->found_dir_item && backref->filetype != filetype)
1138 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1140 backref->index = index;
1141 backref->filetype = filetype;
1142 backref->found_dir_index = 1;
1143 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1145 if (backref->found_dir_item)
1146 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1147 if (backref->found_dir_index && backref->filetype != filetype)
1148 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1150 backref->filetype = filetype;
1151 backref->found_dir_item = 1;
1152 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1153 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1154 if (backref->found_inode_ref)
1155 backref->errors |= REF_ERR_DUP_INODE_REF;
1156 if (backref->found_dir_index && backref->index != index)
1157 backref->errors |= REF_ERR_INDEX_UNMATCH;
1159 backref->index = index;
1161 backref->ref_type = itemtype;
1162 backref->found_inode_ref = 1;
1167 maybe_free_inode_rec(inode_cache, rec);
1171 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1172 struct cache_tree *dst_cache)
1174 struct inode_backref *backref;
1179 list_for_each_entry(backref, &src->backrefs, list) {
1180 if (backref->found_dir_index) {
1181 add_inode_backref(dst_cache, dst->ino, backref->dir,
1182 backref->index, backref->name,
1183 backref->namelen, backref->filetype,
1184 BTRFS_DIR_INDEX_KEY, backref->errors);
1186 if (backref->found_dir_item) {
1188 add_inode_backref(dst_cache, dst->ino,
1189 backref->dir, 0, backref->name,
1190 backref->namelen, backref->filetype,
1191 BTRFS_DIR_ITEM_KEY, backref->errors);
1193 if (backref->found_inode_ref) {
1194 add_inode_backref(dst_cache, dst->ino,
1195 backref->dir, backref->index,
1196 backref->name, backref->namelen, 0,
1197 backref->ref_type, backref->errors);
1201 if (src->found_dir_item)
1202 dst->found_dir_item = 1;
1203 if (src->found_file_extent)
1204 dst->found_file_extent = 1;
1205 if (src->found_csum_item)
1206 dst->found_csum_item = 1;
1207 if (src->some_csum_missing)
1208 dst->some_csum_missing = 1;
1209 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1210 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1215 BUG_ON(src->found_link < dir_count);
1216 dst->found_link += src->found_link - dir_count;
1217 dst->found_size += src->found_size;
1218 if (src->extent_start != (u64)-1) {
1219 if (dst->extent_start == (u64)-1) {
1220 dst->extent_start = src->extent_start;
1221 dst->extent_end = src->extent_end;
1223 if (dst->extent_end > src->extent_start)
1224 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1225 else if (dst->extent_end < src->extent_start) {
1226 ret = add_file_extent_hole(&dst->holes,
1228 src->extent_start - dst->extent_end);
1230 if (dst->extent_end < src->extent_end)
1231 dst->extent_end = src->extent_end;
1235 dst->errors |= src->errors;
1236 if (src->found_inode_item) {
1237 if (!dst->found_inode_item) {
1238 dst->nlink = src->nlink;
1239 dst->isize = src->isize;
1240 dst->nbytes = src->nbytes;
1241 dst->imode = src->imode;
1242 dst->nodatasum = src->nodatasum;
1243 dst->found_inode_item = 1;
1245 dst->errors |= I_ERR_DUP_INODE_ITEM;
1253 static int splice_shared_node(struct shared_node *src_node,
1254 struct shared_node *dst_node)
1256 struct cache_extent *cache;
1257 struct ptr_node *node, *ins;
1258 struct cache_tree *src, *dst;
1259 struct inode_record *rec, *conflict;
1260 u64 current_ino = 0;
1264 if (--src_node->refs == 0)
1266 if (src_node->current)
1267 current_ino = src_node->current->ino;
1269 src = &src_node->root_cache;
1270 dst = &dst_node->root_cache;
1272 cache = search_cache_extent(src, 0);
1274 node = container_of(cache, struct ptr_node, cache);
1276 cache = next_cache_extent(cache);
1279 remove_cache_extent(src, &node->cache);
1282 ins = malloc(sizeof(*ins));
1284 ins->cache.start = node->cache.start;
1285 ins->cache.size = node->cache.size;
1289 ret = insert_cache_extent(dst, &ins->cache);
1290 if (ret == -EEXIST) {
1291 conflict = get_inode_rec(dst, rec->ino, 1);
1292 BUG_ON(IS_ERR(conflict));
1293 merge_inode_recs(rec, conflict, dst);
1295 conflict->checked = 1;
1296 if (dst_node->current == conflict)
1297 dst_node->current = NULL;
1299 maybe_free_inode_rec(dst, conflict);
1300 free_inode_rec(rec);
1307 if (src == &src_node->root_cache) {
1308 src = &src_node->inode_cache;
1309 dst = &dst_node->inode_cache;
1313 if (current_ino > 0 && (!dst_node->current ||
1314 current_ino > dst_node->current->ino)) {
1315 if (dst_node->current) {
1316 dst_node->current->checked = 1;
1317 maybe_free_inode_rec(dst, dst_node->current);
1319 dst_node->current = get_inode_rec(dst, current_ino, 1);
1320 BUG_ON(IS_ERR(dst_node->current));
1325 static void free_inode_ptr(struct cache_extent *cache)
1327 struct ptr_node *node;
1328 struct inode_record *rec;
1330 node = container_of(cache, struct ptr_node, cache);
1332 free_inode_rec(rec);
1336 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1338 static struct shared_node *find_shared_node(struct cache_tree *shared,
1341 struct cache_extent *cache;
1342 struct shared_node *node;
1344 cache = lookup_cache_extent(shared, bytenr, 1);
1346 node = container_of(cache, struct shared_node, cache);
1352 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1355 struct shared_node *node;
1357 node = calloc(1, sizeof(*node));
1360 node->cache.start = bytenr;
1361 node->cache.size = 1;
1362 cache_tree_init(&node->root_cache);
1363 cache_tree_init(&node->inode_cache);
1366 ret = insert_cache_extent(shared, &node->cache);
1371 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1372 struct walk_control *wc, int level)
1374 struct shared_node *node;
1375 struct shared_node *dest;
1378 if (level == wc->active_node)
1381 BUG_ON(wc->active_node <= level);
1382 node = find_shared_node(&wc->shared, bytenr);
1384 ret = add_shared_node(&wc->shared, bytenr, refs);
1386 node = find_shared_node(&wc->shared, bytenr);
1387 wc->nodes[level] = node;
1388 wc->active_node = level;
1392 if (wc->root_level == wc->active_node &&
1393 btrfs_root_refs(&root->root_item) == 0) {
1394 if (--node->refs == 0) {
1395 free_inode_recs_tree(&node->root_cache);
1396 free_inode_recs_tree(&node->inode_cache);
1397 remove_cache_extent(&wc->shared, &node->cache);
1403 dest = wc->nodes[wc->active_node];
1404 splice_shared_node(node, dest);
1405 if (node->refs == 0) {
1406 remove_cache_extent(&wc->shared, &node->cache);
1412 static int leave_shared_node(struct btrfs_root *root,
1413 struct walk_control *wc, int level)
1415 struct shared_node *node;
1416 struct shared_node *dest;
1419 if (level == wc->root_level)
1422 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1426 BUG_ON(i >= BTRFS_MAX_LEVEL);
1428 node = wc->nodes[wc->active_node];
1429 wc->nodes[wc->active_node] = NULL;
1430 wc->active_node = i;
1432 dest = wc->nodes[wc->active_node];
1433 if (wc->active_node < wc->root_level ||
1434 btrfs_root_refs(&root->root_item) > 0) {
1435 BUG_ON(node->refs <= 1);
1436 splice_shared_node(node, dest);
1438 BUG_ON(node->refs < 2);
1447 * 1 - if the root with id child_root_id is a child of root parent_root_id
1448 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1449 * has other root(s) as parent(s)
1450 * 2 - if the root child_root_id doesn't have any parent roots
1452 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1455 struct btrfs_path path;
1456 struct btrfs_key key;
1457 struct extent_buffer *leaf;
1461 btrfs_init_path(&path);
1463 key.objectid = parent_root_id;
1464 key.type = BTRFS_ROOT_REF_KEY;
1465 key.offset = child_root_id;
1466 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1470 btrfs_release_path(&path);
1474 key.objectid = child_root_id;
1475 key.type = BTRFS_ROOT_BACKREF_KEY;
1477 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1483 leaf = path.nodes[0];
1484 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1485 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1488 leaf = path.nodes[0];
1491 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1492 if (key.objectid != child_root_id ||
1493 key.type != BTRFS_ROOT_BACKREF_KEY)
1498 if (key.offset == parent_root_id) {
1499 btrfs_release_path(&path);
1506 btrfs_release_path(&path);
1509 return has_parent ? 0 : 2;
1512 static int process_dir_item(struct btrfs_root *root,
1513 struct extent_buffer *eb,
1514 int slot, struct btrfs_key *key,
1515 struct shared_node *active_node)
1525 struct btrfs_dir_item *di;
1526 struct inode_record *rec;
1527 struct cache_tree *root_cache;
1528 struct cache_tree *inode_cache;
1529 struct btrfs_key location;
1530 char namebuf[BTRFS_NAME_LEN];
1532 root_cache = &active_node->root_cache;
1533 inode_cache = &active_node->inode_cache;
1534 rec = active_node->current;
1535 rec->found_dir_item = 1;
1537 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1538 total = btrfs_item_size_nr(eb, slot);
1539 while (cur < total) {
1541 btrfs_dir_item_key_to_cpu(eb, di, &location);
1542 name_len = btrfs_dir_name_len(eb, di);
1543 data_len = btrfs_dir_data_len(eb, di);
1544 filetype = btrfs_dir_type(eb, di);
1546 rec->found_size += name_len;
1547 if (name_len <= BTRFS_NAME_LEN) {
1551 len = BTRFS_NAME_LEN;
1552 error = REF_ERR_NAME_TOO_LONG;
1554 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1556 if (location.type == BTRFS_INODE_ITEM_KEY) {
1557 add_inode_backref(inode_cache, location.objectid,
1558 key->objectid, key->offset, namebuf,
1559 len, filetype, key->type, error);
1560 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1561 add_inode_backref(root_cache, location.objectid,
1562 key->objectid, key->offset,
1563 namebuf, len, filetype,
1566 fprintf(stderr, "invalid location in dir item %u\n",
1568 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1569 key->objectid, key->offset, namebuf,
1570 len, filetype, key->type, error);
1573 len = sizeof(*di) + name_len + data_len;
1574 di = (struct btrfs_dir_item *)((char *)di + len);
1577 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1578 rec->errors |= I_ERR_DUP_DIR_INDEX;
1583 static int process_inode_ref(struct extent_buffer *eb,
1584 int slot, struct btrfs_key *key,
1585 struct shared_node *active_node)
1593 struct cache_tree *inode_cache;
1594 struct btrfs_inode_ref *ref;
1595 char namebuf[BTRFS_NAME_LEN];
1597 inode_cache = &active_node->inode_cache;
1599 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1600 total = btrfs_item_size_nr(eb, slot);
1601 while (cur < total) {
1602 name_len = btrfs_inode_ref_name_len(eb, ref);
1603 index = btrfs_inode_ref_index(eb, ref);
1604 if (name_len <= BTRFS_NAME_LEN) {
1608 len = BTRFS_NAME_LEN;
1609 error = REF_ERR_NAME_TOO_LONG;
1611 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1612 add_inode_backref(inode_cache, key->objectid, key->offset,
1613 index, namebuf, len, 0, key->type, error);
1615 len = sizeof(*ref) + name_len;
1616 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1622 static int process_inode_extref(struct extent_buffer *eb,
1623 int slot, struct btrfs_key *key,
1624 struct shared_node *active_node)
1633 struct cache_tree *inode_cache;
1634 struct btrfs_inode_extref *extref;
1635 char namebuf[BTRFS_NAME_LEN];
1637 inode_cache = &active_node->inode_cache;
1639 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1640 total = btrfs_item_size_nr(eb, slot);
1641 while (cur < total) {
1642 name_len = btrfs_inode_extref_name_len(eb, extref);
1643 index = btrfs_inode_extref_index(eb, extref);
1644 parent = btrfs_inode_extref_parent(eb, extref);
1645 if (name_len <= BTRFS_NAME_LEN) {
1649 len = BTRFS_NAME_LEN;
1650 error = REF_ERR_NAME_TOO_LONG;
1652 read_extent_buffer(eb, namebuf,
1653 (unsigned long)(extref + 1), len);
1654 add_inode_backref(inode_cache, key->objectid, parent,
1655 index, namebuf, len, 0, key->type, error);
1657 len = sizeof(*extref) + name_len;
1658 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1665 static int count_csum_range(struct btrfs_root *root, u64 start,
1666 u64 len, u64 *found)
1668 struct btrfs_key key;
1669 struct btrfs_path path;
1670 struct extent_buffer *leaf;
1675 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1677 btrfs_init_path(&path);
1679 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1681 key.type = BTRFS_EXTENT_CSUM_KEY;
1683 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1687 if (ret > 0 && path.slots[0] > 0) {
1688 leaf = path.nodes[0];
1689 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1690 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1691 key.type == BTRFS_EXTENT_CSUM_KEY)
1696 leaf = path.nodes[0];
1697 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1698 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1703 leaf = path.nodes[0];
1706 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1707 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1708 key.type != BTRFS_EXTENT_CSUM_KEY)
1711 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1712 if (key.offset >= start + len)
1715 if (key.offset > start)
1718 size = btrfs_item_size_nr(leaf, path.slots[0]);
1719 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1720 if (csum_end > start) {
1721 size = min(csum_end - start, len);
1730 btrfs_release_path(&path);
1736 static int process_file_extent(struct btrfs_root *root,
1737 struct extent_buffer *eb,
1738 int slot, struct btrfs_key *key,
1739 struct shared_node *active_node)
1741 struct inode_record *rec;
1742 struct btrfs_file_extent_item *fi;
1744 u64 disk_bytenr = 0;
1745 u64 extent_offset = 0;
1746 u64 mask = root->sectorsize - 1;
1750 rec = active_node->current;
1751 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1752 rec->found_file_extent = 1;
1754 if (rec->extent_start == (u64)-1) {
1755 rec->extent_start = key->offset;
1756 rec->extent_end = key->offset;
1759 if (rec->extent_end > key->offset)
1760 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1761 else if (rec->extent_end < key->offset) {
1762 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1763 key->offset - rec->extent_end);
1768 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1769 extent_type = btrfs_file_extent_type(eb, fi);
1771 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1772 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1774 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1775 rec->found_size += num_bytes;
1776 num_bytes = (num_bytes + mask) & ~mask;
1777 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1778 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1779 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1780 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1781 extent_offset = btrfs_file_extent_offset(eb, fi);
1782 if (num_bytes == 0 || (num_bytes & mask))
1783 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1784 if (num_bytes + extent_offset >
1785 btrfs_file_extent_ram_bytes(eb, fi))
1786 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1787 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1788 (btrfs_file_extent_compression(eb, fi) ||
1789 btrfs_file_extent_encryption(eb, fi) ||
1790 btrfs_file_extent_other_encoding(eb, fi)))
1791 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1792 if (disk_bytenr > 0)
1793 rec->found_size += num_bytes;
1795 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1797 rec->extent_end = key->offset + num_bytes;
1800 * The data reloc tree will copy full extents into its inode and then
1801 * copy the corresponding csums. Because the extent it copied could be
1802 * a preallocated extent that hasn't been written to yet there may be no
1803 * csums to copy, ergo we won't have csums for our file extent. This is
1804 * ok so just don't bother checking csums if the inode belongs to the
1807 if (disk_bytenr > 0 &&
1808 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1810 if (btrfs_file_extent_compression(eb, fi))
1811 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1813 disk_bytenr += extent_offset;
1815 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1818 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1820 rec->found_csum_item = 1;
1821 if (found < num_bytes)
1822 rec->some_csum_missing = 1;
1823 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1825 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1831 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1832 struct walk_control *wc)
1834 struct btrfs_key key;
1838 struct cache_tree *inode_cache;
1839 struct shared_node *active_node;
1841 if (wc->root_level == wc->active_node &&
1842 btrfs_root_refs(&root->root_item) == 0)
1845 active_node = wc->nodes[wc->active_node];
1846 inode_cache = &active_node->inode_cache;
1847 nritems = btrfs_header_nritems(eb);
1848 for (i = 0; i < nritems; i++) {
1849 btrfs_item_key_to_cpu(eb, &key, i);
1851 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1853 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1856 if (active_node->current == NULL ||
1857 active_node->current->ino < key.objectid) {
1858 if (active_node->current) {
1859 active_node->current->checked = 1;
1860 maybe_free_inode_rec(inode_cache,
1861 active_node->current);
1863 active_node->current = get_inode_rec(inode_cache,
1865 BUG_ON(IS_ERR(active_node->current));
1868 case BTRFS_DIR_ITEM_KEY:
1869 case BTRFS_DIR_INDEX_KEY:
1870 ret = process_dir_item(root, eb, i, &key, active_node);
1872 case BTRFS_INODE_REF_KEY:
1873 ret = process_inode_ref(eb, i, &key, active_node);
1875 case BTRFS_INODE_EXTREF_KEY:
1876 ret = process_inode_extref(eb, i, &key, active_node);
1878 case BTRFS_INODE_ITEM_KEY:
1879 ret = process_inode_item(eb, i, &key, active_node);
1881 case BTRFS_EXTENT_DATA_KEY:
1882 ret = process_file_extent(root, eb, i, &key,
1892 static void reada_walk_down(struct btrfs_root *root,
1893 struct extent_buffer *node, int slot)
1902 level = btrfs_header_level(node);
1906 nritems = btrfs_header_nritems(node);
1907 blocksize = root->nodesize;
1908 for (i = slot; i < nritems; i++) {
1909 bytenr = btrfs_node_blockptr(node, i);
1910 ptr_gen = btrfs_node_ptr_generation(node, i);
1911 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1916 * Check the child node/leaf by the following condition:
1917 * 1. the first item key of the node/leaf should be the same with the one
1919 * 2. block in parent node should match the child node/leaf.
1920 * 3. generation of parent node and child's header should be consistent.
1922 * Or the child node/leaf pointed by the key in parent is not valid.
1924 * We hope to check leaf owner too, but since subvol may share leaves,
1925 * which makes leaf owner check not so strong, key check should be
1926 * sufficient enough for that case.
1928 static int check_child_node(struct btrfs_root *root,
1929 struct extent_buffer *parent, int slot,
1930 struct extent_buffer *child)
1932 struct btrfs_key parent_key;
1933 struct btrfs_key child_key;
1936 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1937 if (btrfs_header_level(child) == 0)
1938 btrfs_item_key_to_cpu(child, &child_key, 0);
1940 btrfs_node_key_to_cpu(child, &child_key, 0);
1942 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1945 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1946 parent_key.objectid, parent_key.type, parent_key.offset,
1947 child_key.objectid, child_key.type, child_key.offset);
1949 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1951 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1952 btrfs_node_blockptr(parent, slot),
1953 btrfs_header_bytenr(child));
1955 if (btrfs_node_ptr_generation(parent, slot) !=
1956 btrfs_header_generation(child)) {
1958 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1959 btrfs_header_generation(child),
1960 btrfs_node_ptr_generation(parent, slot));
1966 u64 bytenr[BTRFS_MAX_LEVEL];
1967 u64 refs[BTRFS_MAX_LEVEL];
1970 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1971 struct walk_control *wc, int *level,
1972 struct node_refs *nrefs)
1974 enum btrfs_tree_block_status status;
1977 struct extent_buffer *next;
1978 struct extent_buffer *cur;
1983 WARN_ON(*level < 0);
1984 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1986 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1987 refs = nrefs->refs[*level];
1990 ret = btrfs_lookup_extent_info(NULL, root,
1991 path->nodes[*level]->start,
1992 *level, 1, &refs, NULL);
1997 nrefs->bytenr[*level] = path->nodes[*level]->start;
1998 nrefs->refs[*level] = refs;
2002 ret = enter_shared_node(root, path->nodes[*level]->start,
2010 while (*level >= 0) {
2011 WARN_ON(*level < 0);
2012 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2013 cur = path->nodes[*level];
2015 if (btrfs_header_level(cur) != *level)
2018 if (path->slots[*level] >= btrfs_header_nritems(cur))
2021 ret = process_one_leaf(root, cur, wc);
2026 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2027 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2028 blocksize = root->nodesize;
2030 if (bytenr == nrefs->bytenr[*level - 1]) {
2031 refs = nrefs->refs[*level - 1];
2033 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034 *level - 1, 1, &refs, NULL);
2038 nrefs->bytenr[*level - 1] = bytenr;
2039 nrefs->refs[*level - 1] = refs;
2044 ret = enter_shared_node(root, bytenr, refs,
2047 path->slots[*level]++;
2052 next = btrfs_find_tree_block(root, bytenr, blocksize);
2053 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054 free_extent_buffer(next);
2055 reada_walk_down(root, cur, path->slots[*level]);
2056 next = read_tree_block(root, bytenr, blocksize,
2058 if (!extent_buffer_uptodate(next)) {
2059 struct btrfs_key node_key;
2061 btrfs_node_key_to_cpu(path->nodes[*level],
2063 path->slots[*level]);
2064 btrfs_add_corrupt_extent_record(root->fs_info,
2066 path->nodes[*level]->start,
2067 root->nodesize, *level);
2073 ret = check_child_node(root, cur, path->slots[*level], next);
2079 if (btrfs_is_leaf(next))
2080 status = btrfs_check_leaf(root, NULL, next);
2082 status = btrfs_check_node(root, NULL, next);
2083 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2084 free_extent_buffer(next);
2089 *level = *level - 1;
2090 free_extent_buffer(path->nodes[*level]);
2091 path->nodes[*level] = next;
2092 path->slots[*level] = 0;
2095 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2099 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2100 struct walk_control *wc, int *level)
2103 struct extent_buffer *leaf;
2105 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2106 leaf = path->nodes[i];
2107 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2112 free_extent_buffer(path->nodes[*level]);
2113 path->nodes[*level] = NULL;
2114 BUG_ON(*level > wc->active_node);
2115 if (*level == wc->active_node)
2116 leave_shared_node(root, wc, *level);
2123 static int check_root_dir(struct inode_record *rec)
2125 struct inode_backref *backref;
2128 if (!rec->found_inode_item || rec->errors)
2130 if (rec->nlink != 1 || rec->found_link != 0)
2132 if (list_empty(&rec->backrefs))
2134 backref = to_inode_backref(rec->backrefs.next);
2135 if (!backref->found_inode_ref)
2137 if (backref->index != 0 || backref->namelen != 2 ||
2138 memcmp(backref->name, "..", 2))
2140 if (backref->found_dir_index || backref->found_dir_item)
2147 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2148 struct btrfs_root *root, struct btrfs_path *path,
2149 struct inode_record *rec)
2151 struct btrfs_inode_item *ei;
2152 struct btrfs_key key;
2155 key.objectid = rec->ino;
2156 key.type = BTRFS_INODE_ITEM_KEY;
2157 key.offset = (u64)-1;
2159 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2163 if (!path->slots[0]) {
2170 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2171 if (key.objectid != rec->ino) {
2176 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2177 struct btrfs_inode_item);
2178 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2179 btrfs_mark_buffer_dirty(path->nodes[0]);
2180 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2181 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2182 root->root_key.objectid);
2184 btrfs_release_path(path);
2188 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2189 struct btrfs_root *root,
2190 struct btrfs_path *path,
2191 struct inode_record *rec)
2195 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2196 btrfs_release_path(path);
2198 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2202 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2203 struct btrfs_root *root,
2204 struct btrfs_path *path,
2205 struct inode_record *rec)
2207 struct btrfs_inode_item *ei;
2208 struct btrfs_key key;
2211 key.objectid = rec->ino;
2212 key.type = BTRFS_INODE_ITEM_KEY;
2215 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2222 /* Since ret == 0, no need to check anything */
2223 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2224 struct btrfs_inode_item);
2225 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2226 btrfs_mark_buffer_dirty(path->nodes[0]);
2227 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2228 printf("reset nbytes for ino %llu root %llu\n",
2229 rec->ino, root->root_key.objectid);
2231 btrfs_release_path(path);
2235 static int add_missing_dir_index(struct btrfs_root *root,
2236 struct cache_tree *inode_cache,
2237 struct inode_record *rec,
2238 struct inode_backref *backref)
2240 struct btrfs_path *path;
2241 struct btrfs_trans_handle *trans;
2242 struct btrfs_dir_item *dir_item;
2243 struct extent_buffer *leaf;
2244 struct btrfs_key key;
2245 struct btrfs_disk_key disk_key;
2246 struct inode_record *dir_rec;
2247 unsigned long name_ptr;
2248 u32 data_size = sizeof(*dir_item) + backref->namelen;
2251 path = btrfs_alloc_path();
2255 trans = btrfs_start_transaction(root, 1);
2256 if (IS_ERR(trans)) {
2257 btrfs_free_path(path);
2258 return PTR_ERR(trans);
2261 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2262 (unsigned long long)rec->ino);
2263 key.objectid = backref->dir;
2264 key.type = BTRFS_DIR_INDEX_KEY;
2265 key.offset = backref->index;
2267 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2270 leaf = path->nodes[0];
2271 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2273 disk_key.objectid = cpu_to_le64(rec->ino);
2274 disk_key.type = BTRFS_INODE_ITEM_KEY;
2275 disk_key.offset = 0;
2277 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2278 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2279 btrfs_set_dir_data_len(leaf, dir_item, 0);
2280 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2281 name_ptr = (unsigned long)(dir_item + 1);
2282 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2283 btrfs_mark_buffer_dirty(leaf);
2284 btrfs_free_path(path);
2285 btrfs_commit_transaction(trans, root);
2287 backref->found_dir_index = 1;
2288 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2289 BUG_ON(IS_ERR(dir_rec));
2292 dir_rec->found_size += backref->namelen;
2293 if (dir_rec->found_size == dir_rec->isize &&
2294 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2295 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2296 if (dir_rec->found_size != dir_rec->isize)
2297 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2302 static int delete_dir_index(struct btrfs_root *root,
2303 struct cache_tree *inode_cache,
2304 struct inode_record *rec,
2305 struct inode_backref *backref)
2307 struct btrfs_trans_handle *trans;
2308 struct btrfs_dir_item *di;
2309 struct btrfs_path *path;
2312 path = btrfs_alloc_path();
2316 trans = btrfs_start_transaction(root, 1);
2317 if (IS_ERR(trans)) {
2318 btrfs_free_path(path);
2319 return PTR_ERR(trans);
2323 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2324 (unsigned long long)backref->dir,
2325 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2326 (unsigned long long)root->objectid);
2328 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2329 backref->name, backref->namelen,
2330 backref->index, -1);
2333 btrfs_free_path(path);
2334 btrfs_commit_transaction(trans, root);
2341 ret = btrfs_del_item(trans, root, path);
2343 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2345 btrfs_free_path(path);
2346 btrfs_commit_transaction(trans, root);
2350 static int create_inode_item(struct btrfs_root *root,
2351 struct inode_record *rec,
2352 struct inode_backref *backref, int root_dir)
2354 struct btrfs_trans_handle *trans;
2355 struct btrfs_inode_item inode_item;
2356 time_t now = time(NULL);
2359 trans = btrfs_start_transaction(root, 1);
2360 if (IS_ERR(trans)) {
2361 ret = PTR_ERR(trans);
2365 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2366 "be incomplete, please check permissions and content after "
2367 "the fsck completes.\n", (unsigned long long)root->objectid,
2368 (unsigned long long)rec->ino);
2370 memset(&inode_item, 0, sizeof(inode_item));
2371 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2373 btrfs_set_stack_inode_nlink(&inode_item, 1);
2375 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2376 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2377 if (rec->found_dir_item) {
2378 if (rec->found_file_extent)
2379 fprintf(stderr, "root %llu inode %llu has both a dir "
2380 "item and extents, unsure if it is a dir or a "
2381 "regular file so setting it as a directory\n",
2382 (unsigned long long)root->objectid,
2383 (unsigned long long)rec->ino);
2384 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2385 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2386 } else if (!rec->found_dir_item) {
2387 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2388 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2390 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2391 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2392 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2393 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2394 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2395 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2396 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2397 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2399 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2401 btrfs_commit_transaction(trans, root);
2405 static int repair_inode_backrefs(struct btrfs_root *root,
2406 struct inode_record *rec,
2407 struct cache_tree *inode_cache,
2410 struct inode_backref *tmp, *backref;
2411 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2415 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2416 if (!delete && rec->ino == root_dirid) {
2417 if (!rec->found_inode_item) {
2418 ret = create_inode_item(root, rec, backref, 1);
2425 /* Index 0 for root dir's are special, don't mess with it */
2426 if (rec->ino == root_dirid && backref->index == 0)
2430 ((backref->found_dir_index && !backref->found_inode_ref) ||
2431 (backref->found_dir_index && backref->found_inode_ref &&
2432 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2433 ret = delete_dir_index(root, inode_cache, rec, backref);
2437 list_del(&backref->list);
2441 if (!delete && !backref->found_dir_index &&
2442 backref->found_dir_item && backref->found_inode_ref) {
2443 ret = add_missing_dir_index(root, inode_cache, rec,
2448 if (backref->found_dir_item &&
2449 backref->found_dir_index &&
2450 backref->found_dir_index) {
2451 if (!backref->errors &&
2452 backref->found_inode_ref) {
2453 list_del(&backref->list);
2459 if (!delete && (!backref->found_dir_index &&
2460 !backref->found_dir_item &&
2461 backref->found_inode_ref)) {
2462 struct btrfs_trans_handle *trans;
2463 struct btrfs_key location;
2465 ret = check_dir_conflict(root, backref->name,
2471 * let nlink fixing routine to handle it,
2472 * which can do it better.
2477 location.objectid = rec->ino;
2478 location.type = BTRFS_INODE_ITEM_KEY;
2479 location.offset = 0;
2481 trans = btrfs_start_transaction(root, 1);
2482 if (IS_ERR(trans)) {
2483 ret = PTR_ERR(trans);
2486 fprintf(stderr, "adding missing dir index/item pair "
2488 (unsigned long long)rec->ino);
2489 ret = btrfs_insert_dir_item(trans, root, backref->name,
2491 backref->dir, &location,
2492 imode_to_type(rec->imode),
2495 btrfs_commit_transaction(trans, root);
2499 if (!delete && (backref->found_inode_ref &&
2500 backref->found_dir_index &&
2501 backref->found_dir_item &&
2502 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2503 !rec->found_inode_item)) {
2504 ret = create_inode_item(root, rec, backref, 0);
2511 return ret ? ret : repaired;
2515 * To determine the file type for nlink/inode_item repair
2517 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2518 * Return -ENOENT if file type is not found.
2520 static int find_file_type(struct inode_record *rec, u8 *type)
2522 struct inode_backref *backref;
2524 /* For inode item recovered case */
2525 if (rec->found_inode_item) {
2526 *type = imode_to_type(rec->imode);
2530 list_for_each_entry(backref, &rec->backrefs, list) {
2531 if (backref->found_dir_index || backref->found_dir_item) {
2532 *type = backref->filetype;
2540 * To determine the file name for nlink repair
2542 * Return 0 if file name is found, set name and namelen.
2543 * Return -ENOENT if file name is not found.
2545 static int find_file_name(struct inode_record *rec,
2546 char *name, int *namelen)
2548 struct inode_backref *backref;
2550 list_for_each_entry(backref, &rec->backrefs, list) {
2551 if (backref->found_dir_index || backref->found_dir_item ||
2552 backref->found_inode_ref) {
2553 memcpy(name, backref->name, backref->namelen);
2554 *namelen = backref->namelen;
2561 /* Reset the nlink of the inode to the correct one */
2562 static int reset_nlink(struct btrfs_trans_handle *trans,
2563 struct btrfs_root *root,
2564 struct btrfs_path *path,
2565 struct inode_record *rec)
2567 struct inode_backref *backref;
2568 struct inode_backref *tmp;
2569 struct btrfs_key key;
2570 struct btrfs_inode_item *inode_item;
2573 /* We don't believe this either, reset it and iterate backref */
2574 rec->found_link = 0;
2576 /* Remove all backref including the valid ones */
2577 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2578 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2579 backref->index, backref->name,
2580 backref->namelen, 0);
2584 /* remove invalid backref, so it won't be added back */
2585 if (!(backref->found_dir_index &&
2586 backref->found_dir_item &&
2587 backref->found_inode_ref)) {
2588 list_del(&backref->list);
2595 /* Set nlink to 0 */
2596 key.objectid = rec->ino;
2597 key.type = BTRFS_INODE_ITEM_KEY;
2599 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2606 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607 struct btrfs_inode_item);
2608 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2609 btrfs_mark_buffer_dirty(path->nodes[0]);
2610 btrfs_release_path(path);
2613 * Add back valid inode_ref/dir_item/dir_index,
2614 * add_link() will handle the nlink inc, so new nlink must be correct
2616 list_for_each_entry(backref, &rec->backrefs, list) {
2617 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2618 backref->name, backref->namelen,
2619 backref->filetype, &backref->index, 1);
2624 btrfs_release_path(path);
2628 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2629 struct btrfs_root *root,
2630 struct btrfs_path *path,
2631 struct inode_record *rec)
2633 char *dir_name = "lost+found";
2634 char namebuf[BTRFS_NAME_LEN] = {0};
2639 int name_recovered = 0;
2640 int type_recovered = 0;
2644 * Get file name and type first before these invalid inode ref
2645 * are deleted by remove_all_invalid_backref()
2647 name_recovered = !find_file_name(rec, namebuf, &namelen);
2648 type_recovered = !find_file_type(rec, &type);
2650 if (!name_recovered) {
2651 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2652 rec->ino, rec->ino);
2653 namelen = count_digits(rec->ino);
2654 sprintf(namebuf, "%llu", rec->ino);
2657 if (!type_recovered) {
2658 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2660 type = BTRFS_FT_REG_FILE;
2664 ret = reset_nlink(trans, root, path, rec);
2667 "Failed to reset nlink for inode %llu: %s\n",
2668 rec->ino, strerror(-ret));
2672 if (rec->found_link == 0) {
2673 lost_found_ino = root->highest_inode;
2674 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2679 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2680 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2683 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2684 dir_name, strerror(-ret));
2687 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2688 namebuf, namelen, type, NULL, 1);
2690 * Add ".INO" suffix several times to handle case where
2691 * "FILENAME.INO" is already taken by another file.
2693 while (ret == -EEXIST) {
2695 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2697 if (namelen + count_digits(rec->ino) + 1 >
2702 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2704 namelen += count_digits(rec->ino) + 1;
2705 ret = btrfs_add_link(trans, root, rec->ino,
2706 lost_found_ino, namebuf,
2707 namelen, type, NULL, 1);
2711 "Failed to link the inode %llu to %s dir: %s\n",
2712 rec->ino, dir_name, strerror(-ret));
2716 * Just increase the found_link, don't actually add the
2717 * backref. This will make things easier and this inode
2718 * record will be freed after the repair is done.
2719 * So fsck will not report problem about this inode.
2722 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2723 namelen, namebuf, dir_name);
2725 printf("Fixed the nlink of inode %llu\n", rec->ino);
2728 * Clear the flag anyway, or we will loop forever for the same inode
2729 * as it will not be removed from the bad inode list and the dead loop
2732 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2733 btrfs_release_path(path);
2738 * Check if there is any normal(reg or prealloc) file extent for given
2740 * This is used to determine the file type when neither its dir_index/item or
2741 * inode_item exists.
2743 * This will *NOT* report error, if any error happens, just consider it does
2744 * not have any normal file extent.
2746 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2748 struct btrfs_path *path;
2749 struct btrfs_key key;
2750 struct btrfs_key found_key;
2751 struct btrfs_file_extent_item *fi;
2755 path = btrfs_alloc_path();
2759 key.type = BTRFS_EXTENT_DATA_KEY;
2762 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2767 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2768 ret = btrfs_next_leaf(root, path);
2775 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2777 if (found_key.objectid != ino ||
2778 found_key.type != BTRFS_EXTENT_DATA_KEY)
2780 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2781 struct btrfs_file_extent_item);
2782 type = btrfs_file_extent_type(path->nodes[0], fi);
2783 if (type != BTRFS_FILE_EXTENT_INLINE) {
2789 btrfs_free_path(path);
2793 static u32 btrfs_type_to_imode(u8 type)
2795 static u32 imode_by_btrfs_type[] = {
2796 [BTRFS_FT_REG_FILE] = S_IFREG,
2797 [BTRFS_FT_DIR] = S_IFDIR,
2798 [BTRFS_FT_CHRDEV] = S_IFCHR,
2799 [BTRFS_FT_BLKDEV] = S_IFBLK,
2800 [BTRFS_FT_FIFO] = S_IFIFO,
2801 [BTRFS_FT_SOCK] = S_IFSOCK,
2802 [BTRFS_FT_SYMLINK] = S_IFLNK,
2805 return imode_by_btrfs_type[(type)];
2808 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2809 struct btrfs_root *root,
2810 struct btrfs_path *path,
2811 struct inode_record *rec)
2815 int type_recovered = 0;
2818 printf("Trying to rebuild inode:%llu\n", rec->ino);
2820 type_recovered = !find_file_type(rec, &filetype);
2823 * Try to determine inode type if type not found.
2825 * For found regular file extent, it must be FILE.
2826 * For found dir_item/index, it must be DIR.
2828 * For undetermined one, use FILE as fallback.
2831 * 1. If found backref(inode_index/item is already handled) to it,
2833 * Need new inode-inode ref structure to allow search for that.
2835 if (!type_recovered) {
2836 if (rec->found_file_extent &&
2837 find_normal_file_extent(root, rec->ino)) {
2839 filetype = BTRFS_FT_REG_FILE;
2840 } else if (rec->found_dir_item) {
2842 filetype = BTRFS_FT_DIR;
2843 } else if (!list_empty(&rec->orphan_extents)) {
2845 filetype = BTRFS_FT_REG_FILE;
2847 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2850 filetype = BTRFS_FT_REG_FILE;
2854 ret = btrfs_new_inode(trans, root, rec->ino,
2855 mode | btrfs_type_to_imode(filetype));
2860 * Here inode rebuild is done, we only rebuild the inode item,
2861 * don't repair the nlink(like move to lost+found).
2862 * That is the job of nlink repair.
2864 * We just fill the record and return
2866 rec->found_dir_item = 1;
2867 rec->imode = mode | btrfs_type_to_imode(filetype);
2869 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2870 /* Ensure the inode_nlinks repair function will be called */
2871 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2876 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2877 struct btrfs_root *root,
2878 struct btrfs_path *path,
2879 struct inode_record *rec)
2881 struct orphan_data_extent *orphan;
2882 struct orphan_data_extent *tmp;
2885 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2887 * Check for conflicting file extents
2889 * Here we don't know whether the extents is compressed or not,
2890 * so we can only assume it not compressed nor data offset,
2891 * and use its disk_len as extent length.
2893 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2894 orphan->offset, orphan->disk_len, 0);
2895 btrfs_release_path(path);
2900 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2901 orphan->disk_bytenr, orphan->disk_len);
2902 ret = btrfs_free_extent(trans,
2903 root->fs_info->extent_root,
2904 orphan->disk_bytenr, orphan->disk_len,
2905 0, root->objectid, orphan->objectid,
2910 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2911 orphan->offset, orphan->disk_bytenr,
2912 orphan->disk_len, orphan->disk_len);
2916 /* Update file size info */
2917 rec->found_size += orphan->disk_len;
2918 if (rec->found_size == rec->nbytes)
2919 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2921 /* Update the file extent hole info too */
2922 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2926 if (RB_EMPTY_ROOT(&rec->holes))
2927 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2929 list_del(&orphan->list);
2932 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2937 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2938 struct btrfs_root *root,
2939 struct btrfs_path *path,
2940 struct inode_record *rec)
2942 struct rb_node *node;
2943 struct file_extent_hole *hole;
2947 node = rb_first(&rec->holes);
2951 hole = rb_entry(node, struct file_extent_hole, node);
2952 ret = btrfs_punch_hole(trans, root, rec->ino,
2953 hole->start, hole->len);
2956 ret = del_file_extent_hole(&rec->holes, hole->start,
2960 if (RB_EMPTY_ROOT(&rec->holes))
2961 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2962 node = rb_first(&rec->holes);
2964 /* special case for a file losing all its file extent */
2966 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2967 round_up(rec->isize, root->sectorsize));
2971 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2972 rec->ino, root->objectid);
2977 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2979 struct btrfs_trans_handle *trans;
2980 struct btrfs_path *path;
2983 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2984 I_ERR_NO_ORPHAN_ITEM |
2985 I_ERR_LINK_COUNT_WRONG |
2986 I_ERR_NO_INODE_ITEM |
2987 I_ERR_FILE_EXTENT_ORPHAN |
2988 I_ERR_FILE_EXTENT_DISCOUNT|
2989 I_ERR_FILE_NBYTES_WRONG)))
2992 path = btrfs_alloc_path();
2997 * For nlink repair, it may create a dir and add link, so
2998 * 2 for parent(256)'s dir_index and dir_item
2999 * 2 for lost+found dir's inode_item and inode_ref
3000 * 1 for the new inode_ref of the file
3001 * 2 for lost+found dir's dir_index and dir_item for the file
3003 trans = btrfs_start_transaction(root, 7);
3004 if (IS_ERR(trans)) {
3005 btrfs_free_path(path);
3006 return PTR_ERR(trans);
3009 if (rec->errors & I_ERR_NO_INODE_ITEM)
3010 ret = repair_inode_no_item(trans, root, path, rec);
3011 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3012 ret = repair_inode_orphan_extent(trans, root, path, rec);
3013 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3014 ret = repair_inode_discount_extent(trans, root, path, rec);
3015 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3016 ret = repair_inode_isize(trans, root, path, rec);
3017 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3018 ret = repair_inode_orphan_item(trans, root, path, rec);
3019 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3020 ret = repair_inode_nlinks(trans, root, path, rec);
3021 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3022 ret = repair_inode_nbytes(trans, root, path, rec);
3023 btrfs_commit_transaction(trans, root);
3024 btrfs_free_path(path);
3028 static int check_inode_recs(struct btrfs_root *root,
3029 struct cache_tree *inode_cache)
3031 struct cache_extent *cache;
3032 struct ptr_node *node;
3033 struct inode_record *rec;
3034 struct inode_backref *backref;
3039 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3041 if (btrfs_root_refs(&root->root_item) == 0) {
3042 if (!cache_tree_empty(inode_cache))
3043 fprintf(stderr, "warning line %d\n", __LINE__);
3048 * We need to record the highest inode number for later 'lost+found'
3050 * We must select an ino not used/referred by any existing inode, or
3051 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3052 * this may cause 'lost+found' dir has wrong nlinks.
3054 cache = last_cache_extent(inode_cache);
3056 node = container_of(cache, struct ptr_node, cache);
3058 if (rec->ino > root->highest_inode)
3059 root->highest_inode = rec->ino;
3063 * We need to repair backrefs first because we could change some of the
3064 * errors in the inode recs.
3066 * We also need to go through and delete invalid backrefs first and then
3067 * add the correct ones second. We do this because we may get EEXIST
3068 * when adding back the correct index because we hadn't yet deleted the
3071 * For example, if we were missing a dir index then the directories
3072 * isize would be wrong, so if we fixed the isize to what we thought it
3073 * would be and then fixed the backref we'd still have a invalid fs, so
3074 * we need to add back the dir index and then check to see if the isize
3079 if (stage == 3 && !err)
3082 cache = search_cache_extent(inode_cache, 0);
3083 while (repair && cache) {
3084 node = container_of(cache, struct ptr_node, cache);
3086 cache = next_cache_extent(cache);
3088 /* Need to free everything up and rescan */
3090 remove_cache_extent(inode_cache, &node->cache);
3092 free_inode_rec(rec);
3096 if (list_empty(&rec->backrefs))
3099 ret = repair_inode_backrefs(root, rec, inode_cache,
3113 rec = get_inode_rec(inode_cache, root_dirid, 0);
3114 BUG_ON(IS_ERR(rec));
3116 ret = check_root_dir(rec);
3118 fprintf(stderr, "root %llu root dir %llu error\n",
3119 (unsigned long long)root->root_key.objectid,
3120 (unsigned long long)root_dirid);
3121 print_inode_error(root, rec);
3126 struct btrfs_trans_handle *trans;
3128 trans = btrfs_start_transaction(root, 1);
3129 if (IS_ERR(trans)) {
3130 err = PTR_ERR(trans);
3135 "root %llu missing its root dir, recreating\n",
3136 (unsigned long long)root->objectid);
3138 ret = btrfs_make_root_dir(trans, root, root_dirid);
3141 btrfs_commit_transaction(trans, root);
3145 fprintf(stderr, "root %llu root dir %llu not found\n",
3146 (unsigned long long)root->root_key.objectid,
3147 (unsigned long long)root_dirid);
3151 cache = search_cache_extent(inode_cache, 0);
3154 node = container_of(cache, struct ptr_node, cache);
3156 remove_cache_extent(inode_cache, &node->cache);
3158 if (rec->ino == root_dirid ||
3159 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3160 free_inode_rec(rec);
3164 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3165 ret = check_orphan_item(root, rec->ino);
3167 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3168 if (can_free_inode_rec(rec)) {
3169 free_inode_rec(rec);
3174 if (!rec->found_inode_item)
3175 rec->errors |= I_ERR_NO_INODE_ITEM;
3176 if (rec->found_link != rec->nlink)
3177 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3179 ret = try_repair_inode(root, rec);
3180 if (ret == 0 && can_free_inode_rec(rec)) {
3181 free_inode_rec(rec);
3187 if (!(repair && ret == 0))
3189 print_inode_error(root, rec);
3190 list_for_each_entry(backref, &rec->backrefs, list) {
3191 if (!backref->found_dir_item)
3192 backref->errors |= REF_ERR_NO_DIR_ITEM;
3193 if (!backref->found_dir_index)
3194 backref->errors |= REF_ERR_NO_DIR_INDEX;
3195 if (!backref->found_inode_ref)
3196 backref->errors |= REF_ERR_NO_INODE_REF;
3197 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3198 " namelen %u name %s filetype %d errors %x",
3199 (unsigned long long)backref->dir,
3200 (unsigned long long)backref->index,
3201 backref->namelen, backref->name,
3202 backref->filetype, backref->errors);
3203 print_ref_error(backref->errors);
3205 free_inode_rec(rec);
3207 return (error > 0) ? -1 : 0;
3210 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3213 struct cache_extent *cache;
3214 struct root_record *rec = NULL;
3217 cache = lookup_cache_extent(root_cache, objectid, 1);
3219 rec = container_of(cache, struct root_record, cache);
3221 rec = calloc(1, sizeof(*rec));
3223 return ERR_PTR(-ENOMEM);
3224 rec->objectid = objectid;
3225 INIT_LIST_HEAD(&rec->backrefs);
3226 rec->cache.start = objectid;
3227 rec->cache.size = 1;
3229 ret = insert_cache_extent(root_cache, &rec->cache);
3231 return ERR_PTR(-EEXIST);
3236 static struct root_backref *get_root_backref(struct root_record *rec,
3237 u64 ref_root, u64 dir, u64 index,
3238 const char *name, int namelen)
3240 struct root_backref *backref;
3242 list_for_each_entry(backref, &rec->backrefs, list) {
3243 if (backref->ref_root != ref_root || backref->dir != dir ||
3244 backref->namelen != namelen)
3246 if (memcmp(name, backref->name, namelen))
3251 backref = calloc(1, sizeof(*backref) + namelen + 1);
3254 backref->ref_root = ref_root;
3256 backref->index = index;
3257 backref->namelen = namelen;
3258 memcpy(backref->name, name, namelen);
3259 backref->name[namelen] = '\0';
3260 list_add_tail(&backref->list, &rec->backrefs);
3264 static void free_root_record(struct cache_extent *cache)
3266 struct root_record *rec;
3267 struct root_backref *backref;
3269 rec = container_of(cache, struct root_record, cache);
3270 while (!list_empty(&rec->backrefs)) {
3271 backref = to_root_backref(rec->backrefs.next);
3272 list_del(&backref->list);
3279 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3281 static int add_root_backref(struct cache_tree *root_cache,
3282 u64 root_id, u64 ref_root, u64 dir, u64 index,
3283 const char *name, int namelen,
3284 int item_type, int errors)
3286 struct root_record *rec;
3287 struct root_backref *backref;
3289 rec = get_root_rec(root_cache, root_id);
3290 BUG_ON(IS_ERR(rec));
3291 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3294 backref->errors |= errors;
3296 if (item_type != BTRFS_DIR_ITEM_KEY) {
3297 if (backref->found_dir_index || backref->found_back_ref ||
3298 backref->found_forward_ref) {
3299 if (backref->index != index)
3300 backref->errors |= REF_ERR_INDEX_UNMATCH;
3302 backref->index = index;
3306 if (item_type == BTRFS_DIR_ITEM_KEY) {
3307 if (backref->found_forward_ref)
3309 backref->found_dir_item = 1;
3310 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3311 backref->found_dir_index = 1;
3312 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3313 if (backref->found_forward_ref)
3314 backref->errors |= REF_ERR_DUP_ROOT_REF;
3315 else if (backref->found_dir_item)
3317 backref->found_forward_ref = 1;
3318 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3319 if (backref->found_back_ref)
3320 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3321 backref->found_back_ref = 1;
3326 if (backref->found_forward_ref && backref->found_dir_item)
3327 backref->reachable = 1;
3331 static int merge_root_recs(struct btrfs_root *root,
3332 struct cache_tree *src_cache,
3333 struct cache_tree *dst_cache)
3335 struct cache_extent *cache;
3336 struct ptr_node *node;
3337 struct inode_record *rec;
3338 struct inode_backref *backref;
3341 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3342 free_inode_recs_tree(src_cache);
3347 cache = search_cache_extent(src_cache, 0);
3350 node = container_of(cache, struct ptr_node, cache);
3352 remove_cache_extent(src_cache, &node->cache);
3355 ret = is_child_root(root, root->objectid, rec->ino);
3361 list_for_each_entry(backref, &rec->backrefs, list) {
3362 BUG_ON(backref->found_inode_ref);
3363 if (backref->found_dir_item)
3364 add_root_backref(dst_cache, rec->ino,
3365 root->root_key.objectid, backref->dir,
3366 backref->index, backref->name,
3367 backref->namelen, BTRFS_DIR_ITEM_KEY,
3369 if (backref->found_dir_index)
3370 add_root_backref(dst_cache, rec->ino,
3371 root->root_key.objectid, backref->dir,
3372 backref->index, backref->name,
3373 backref->namelen, BTRFS_DIR_INDEX_KEY,
3377 free_inode_rec(rec);
3384 static int check_root_refs(struct btrfs_root *root,
3385 struct cache_tree *root_cache)
3387 struct root_record *rec;
3388 struct root_record *ref_root;
3389 struct root_backref *backref;
3390 struct cache_extent *cache;
3396 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3397 BUG_ON(IS_ERR(rec));
3400 /* fixme: this can not detect circular references */
3403 cache = search_cache_extent(root_cache, 0);
3407 rec = container_of(cache, struct root_record, cache);
3408 cache = next_cache_extent(cache);
3410 if (rec->found_ref == 0)
3413 list_for_each_entry(backref, &rec->backrefs, list) {
3414 if (!backref->reachable)
3417 ref_root = get_root_rec(root_cache,
3419 BUG_ON(IS_ERR(ref_root));
3420 if (ref_root->found_ref > 0)
3423 backref->reachable = 0;
3425 if (rec->found_ref == 0)
3431 cache = search_cache_extent(root_cache, 0);
3435 rec = container_of(cache, struct root_record, cache);
3436 cache = next_cache_extent(cache);
3438 if (rec->found_ref == 0 &&
3439 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3440 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3441 ret = check_orphan_item(root->fs_info->tree_root,
3447 * If we don't have a root item then we likely just have
3448 * a dir item in a snapshot for this root but no actual
3449 * ref key or anything so it's meaningless.
3451 if (!rec->found_root_item)
3454 fprintf(stderr, "fs tree %llu not referenced\n",
3455 (unsigned long long)rec->objectid);
3459 if (rec->found_ref > 0 && !rec->found_root_item)
3461 list_for_each_entry(backref, &rec->backrefs, list) {
3462 if (!backref->found_dir_item)
3463 backref->errors |= REF_ERR_NO_DIR_ITEM;
3464 if (!backref->found_dir_index)
3465 backref->errors |= REF_ERR_NO_DIR_INDEX;
3466 if (!backref->found_back_ref)
3467 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3468 if (!backref->found_forward_ref)
3469 backref->errors |= REF_ERR_NO_ROOT_REF;
3470 if (backref->reachable && backref->errors)
3477 fprintf(stderr, "fs tree %llu refs %u %s\n",
3478 (unsigned long long)rec->objectid, rec->found_ref,
3479 rec->found_root_item ? "" : "not found");
3481 list_for_each_entry(backref, &rec->backrefs, list) {
3482 if (!backref->reachable)
3484 if (!backref->errors && rec->found_root_item)
3486 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3487 " index %llu namelen %u name %s errors %x\n",
3488 (unsigned long long)backref->ref_root,
3489 (unsigned long long)backref->dir,
3490 (unsigned long long)backref->index,
3491 backref->namelen, backref->name,
3493 print_ref_error(backref->errors);
3496 return errors > 0 ? 1 : 0;
3499 static int process_root_ref(struct extent_buffer *eb, int slot,
3500 struct btrfs_key *key,
3501 struct cache_tree *root_cache)
3507 struct btrfs_root_ref *ref;
3508 char namebuf[BTRFS_NAME_LEN];
3511 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3513 dirid = btrfs_root_ref_dirid(eb, ref);
3514 index = btrfs_root_ref_sequence(eb, ref);
3515 name_len = btrfs_root_ref_name_len(eb, ref);
3517 if (name_len <= BTRFS_NAME_LEN) {
3521 len = BTRFS_NAME_LEN;
3522 error = REF_ERR_NAME_TOO_LONG;
3524 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3526 if (key->type == BTRFS_ROOT_REF_KEY) {
3527 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3528 index, namebuf, len, key->type, error);
3530 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3531 index, namebuf, len, key->type, error);
3536 static void free_corrupt_block(struct cache_extent *cache)
3538 struct btrfs_corrupt_block *corrupt;
3540 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3544 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3547 * Repair the btree of the given root.
3549 * The fix is to remove the node key in corrupt_blocks cache_tree.
3550 * and rebalance the tree.
3551 * After the fix, the btree should be writeable.
3553 static int repair_btree(struct btrfs_root *root,
3554 struct cache_tree *corrupt_blocks)
3556 struct btrfs_trans_handle *trans;
3557 struct btrfs_path *path;
3558 struct btrfs_corrupt_block *corrupt;
3559 struct cache_extent *cache;
3560 struct btrfs_key key;
3565 if (cache_tree_empty(corrupt_blocks))
3568 path = btrfs_alloc_path();
3572 trans = btrfs_start_transaction(root, 1);
3573 if (IS_ERR(trans)) {
3574 ret = PTR_ERR(trans);
3575 fprintf(stderr, "Error starting transaction: %s\n",
3579 cache = first_cache_extent(corrupt_blocks);
3581 corrupt = container_of(cache, struct btrfs_corrupt_block,
3583 level = corrupt->level;
3584 path->lowest_level = level;
3585 key.objectid = corrupt->key.objectid;
3586 key.type = corrupt->key.type;
3587 key.offset = corrupt->key.offset;
3590 * Here we don't want to do any tree balance, since it may
3591 * cause a balance with corrupted brother leaf/node,
3592 * so ins_len set to 0 here.
3593 * Balance will be done after all corrupt node/leaf is deleted.
3595 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3598 offset = btrfs_node_blockptr(path->nodes[level],
3599 path->slots[level]);
3601 /* Remove the ptr */
3602 ret = btrfs_del_ptr(trans, root, path, level,
3603 path->slots[level]);
3607 * Remove the corresponding extent
3608 * return value is not concerned.
3610 btrfs_release_path(path);
3611 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3612 0, root->root_key.objectid,
3614 cache = next_cache_extent(cache);
3617 /* Balance the btree using btrfs_search_slot() */
3618 cache = first_cache_extent(corrupt_blocks);
3620 corrupt = container_of(cache, struct btrfs_corrupt_block,
3622 memcpy(&key, &corrupt->key, sizeof(key));
3623 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3626 /* return will always >0 since it won't find the item */
3628 btrfs_release_path(path);
3629 cache = next_cache_extent(cache);
3632 btrfs_commit_transaction(trans, root);
3634 btrfs_free_path(path);
3638 static int check_fs_root(struct btrfs_root *root,
3639 struct cache_tree *root_cache,
3640 struct walk_control *wc)
3646 struct btrfs_path path;
3647 struct shared_node root_node;
3648 struct root_record *rec;
3649 struct btrfs_root_item *root_item = &root->root_item;
3650 struct cache_tree corrupt_blocks;
3651 struct orphan_data_extent *orphan;
3652 struct orphan_data_extent *tmp;
3653 enum btrfs_tree_block_status status;
3654 struct node_refs nrefs;
3657 * Reuse the corrupt_block cache tree to record corrupted tree block
3659 * Unlike the usage in extent tree check, here we do it in a per
3660 * fs/subvol tree base.
3662 cache_tree_init(&corrupt_blocks);
3663 root->fs_info->corrupt_blocks = &corrupt_blocks;
3665 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3666 rec = get_root_rec(root_cache, root->root_key.objectid);
3667 BUG_ON(IS_ERR(rec));
3668 if (btrfs_root_refs(root_item) > 0)
3669 rec->found_root_item = 1;
3672 btrfs_init_path(&path);
3673 memset(&root_node, 0, sizeof(root_node));
3674 cache_tree_init(&root_node.root_cache);
3675 cache_tree_init(&root_node.inode_cache);
3676 memset(&nrefs, 0, sizeof(nrefs));
3678 /* Move the orphan extent record to corresponding inode_record */
3679 list_for_each_entry_safe(orphan, tmp,
3680 &root->orphan_data_extents, list) {
3681 struct inode_record *inode;
3683 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3685 BUG_ON(IS_ERR(inode));
3686 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3687 list_move(&orphan->list, &inode->orphan_extents);
3690 level = btrfs_header_level(root->node);
3691 memset(wc->nodes, 0, sizeof(wc->nodes));
3692 wc->nodes[level] = &root_node;
3693 wc->active_node = level;
3694 wc->root_level = level;
3696 /* We may not have checked the root block, lets do that now */
3697 if (btrfs_is_leaf(root->node))
3698 status = btrfs_check_leaf(root, NULL, root->node);
3700 status = btrfs_check_node(root, NULL, root->node);
3701 if (status != BTRFS_TREE_BLOCK_CLEAN)
3704 if (btrfs_root_refs(root_item) > 0 ||
3705 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3706 path.nodes[level] = root->node;
3707 extent_buffer_get(root->node);
3708 path.slots[level] = 0;
3710 struct btrfs_key key;
3711 struct btrfs_disk_key found_key;
3713 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3714 level = root_item->drop_level;
3715 path.lowest_level = level;
3716 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3719 btrfs_node_key(path.nodes[level], &found_key,
3721 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3722 sizeof(found_key)));
3726 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3732 wret = walk_up_tree(root, &path, wc, &level);
3739 btrfs_release_path(&path);
3741 if (!cache_tree_empty(&corrupt_blocks)) {
3742 struct cache_extent *cache;
3743 struct btrfs_corrupt_block *corrupt;
3745 printf("The following tree block(s) is corrupted in tree %llu:\n",
3746 root->root_key.objectid);
3747 cache = first_cache_extent(&corrupt_blocks);
3749 corrupt = container_of(cache,
3750 struct btrfs_corrupt_block,
3752 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3753 cache->start, corrupt->level,
3754 corrupt->key.objectid, corrupt->key.type,
3755 corrupt->key.offset);
3756 cache = next_cache_extent(cache);
3759 printf("Try to repair the btree for root %llu\n",
3760 root->root_key.objectid);
3761 ret = repair_btree(root, &corrupt_blocks);
3763 fprintf(stderr, "Failed to repair btree: %s\n",
3766 printf("Btree for root %llu is fixed\n",
3767 root->root_key.objectid);
3771 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3775 if (root_node.current) {
3776 root_node.current->checked = 1;
3777 maybe_free_inode_rec(&root_node.inode_cache,
3781 err = check_inode_recs(root, &root_node.inode_cache);
3785 free_corrupt_blocks_tree(&corrupt_blocks);
3786 root->fs_info->corrupt_blocks = NULL;
3787 free_orphan_data_extents(&root->orphan_data_extents);
3791 static int fs_root_objectid(u64 objectid)
3793 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3794 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3796 return is_fstree(objectid);
3799 static int check_fs_roots(struct btrfs_root *root,
3800 struct cache_tree *root_cache)
3802 struct btrfs_path path;
3803 struct btrfs_key key;
3804 struct walk_control wc;
3805 struct extent_buffer *leaf, *tree_node;
3806 struct btrfs_root *tmp_root;
3807 struct btrfs_root *tree_root = root->fs_info->tree_root;
3811 if (ctx.progress_enabled) {
3812 ctx.tp = TASK_FS_ROOTS;
3813 task_start(ctx.info);
3817 * Just in case we made any changes to the extent tree that weren't
3818 * reflected into the free space cache yet.
3821 reset_cached_block_groups(root->fs_info);
3822 memset(&wc, 0, sizeof(wc));
3823 cache_tree_init(&wc.shared);
3824 btrfs_init_path(&path);
3829 key.type = BTRFS_ROOT_ITEM_KEY;
3830 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3835 tree_node = tree_root->node;
3837 if (tree_node != tree_root->node) {
3838 free_root_recs_tree(root_cache);
3839 btrfs_release_path(&path);
3842 leaf = path.nodes[0];
3843 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3844 ret = btrfs_next_leaf(tree_root, &path);
3850 leaf = path.nodes[0];
3852 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3853 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3854 fs_root_objectid(key.objectid)) {
3855 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3856 tmp_root = btrfs_read_fs_root_no_cache(
3857 root->fs_info, &key);
3859 key.offset = (u64)-1;
3860 tmp_root = btrfs_read_fs_root(
3861 root->fs_info, &key);
3863 if (IS_ERR(tmp_root)) {
3867 ret = check_fs_root(tmp_root, root_cache, &wc);
3868 if (ret == -EAGAIN) {
3869 free_root_recs_tree(root_cache);
3870 btrfs_release_path(&path);
3875 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3876 btrfs_free_fs_root(tmp_root);
3877 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3878 key.type == BTRFS_ROOT_BACKREF_KEY) {
3879 process_root_ref(leaf, path.slots[0], &key,
3886 btrfs_release_path(&path);
3888 free_extent_cache_tree(&wc.shared);
3889 if (!cache_tree_empty(&wc.shared))
3890 fprintf(stderr, "warning line %d\n", __LINE__);
3892 task_stop(ctx.info);
3897 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3900 struct extent_backref *back;
3901 struct tree_backref *tback;
3902 struct data_backref *dback;
3906 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3907 back = rb_node_to_extent_backref(n);
3908 if (!back->found_extent_tree) {
3912 if (back->is_data) {
3913 dback = to_data_backref(back);
3914 fprintf(stderr, "Backref %llu %s %llu"
3915 " owner %llu offset %llu num_refs %lu"
3916 " not found in extent tree\n",
3917 (unsigned long long)rec->start,
3918 back->full_backref ?
3920 back->full_backref ?
3921 (unsigned long long)dback->parent:
3922 (unsigned long long)dback->root,
3923 (unsigned long long)dback->owner,
3924 (unsigned long long)dback->offset,
3925 (unsigned long)dback->num_refs);
3927 tback = to_tree_backref(back);
3928 fprintf(stderr, "Backref %llu parent %llu"
3929 " root %llu not found in extent tree\n",
3930 (unsigned long long)rec->start,
3931 (unsigned long long)tback->parent,
3932 (unsigned long long)tback->root);
3935 if (!back->is_data && !back->found_ref) {
3939 tback = to_tree_backref(back);
3940 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3941 (unsigned long long)rec->start,
3942 back->full_backref ? "parent" : "root",
3943 back->full_backref ?
3944 (unsigned long long)tback->parent :
3945 (unsigned long long)tback->root, back);
3947 if (back->is_data) {
3948 dback = to_data_backref(back);
3949 if (dback->found_ref != dback->num_refs) {
3953 fprintf(stderr, "Incorrect local backref count"
3954 " on %llu %s %llu owner %llu"
3955 " offset %llu found %u wanted %u back %p\n",
3956 (unsigned long long)rec->start,
3957 back->full_backref ?
3959 back->full_backref ?
3960 (unsigned long long)dback->parent:
3961 (unsigned long long)dback->root,
3962 (unsigned long long)dback->owner,
3963 (unsigned long long)dback->offset,
3964 dback->found_ref, dback->num_refs, back);
3966 if (dback->disk_bytenr != rec->start) {
3970 fprintf(stderr, "Backref disk bytenr does not"
3971 " match extent record, bytenr=%llu, "
3972 "ref bytenr=%llu\n",
3973 (unsigned long long)rec->start,
3974 (unsigned long long)dback->disk_bytenr);
3977 if (dback->bytes != rec->nr) {
3981 fprintf(stderr, "Backref bytes do not match "
3982 "extent backref, bytenr=%llu, ref "
3983 "bytes=%llu, backref bytes=%llu\n",
3984 (unsigned long long)rec->start,
3985 (unsigned long long)rec->nr,
3986 (unsigned long long)dback->bytes);
3989 if (!back->is_data) {
3992 dback = to_data_backref(back);
3993 found += dback->found_ref;
3996 if (found != rec->refs) {
4000 fprintf(stderr, "Incorrect global backref count "
4001 "on %llu found %llu wanted %llu\n",
4002 (unsigned long long)rec->start,
4003 (unsigned long long)found,
4004 (unsigned long long)rec->refs);
4010 static void __free_one_backref(struct rb_node *node)
4012 struct extent_backref *back = rb_node_to_extent_backref(node);
4017 static void free_all_extent_backrefs(struct extent_record *rec)
4019 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4022 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4023 struct cache_tree *extent_cache)
4025 struct cache_extent *cache;
4026 struct extent_record *rec;
4029 cache = first_cache_extent(extent_cache);
4032 rec = container_of(cache, struct extent_record, cache);
4033 remove_cache_extent(extent_cache, cache);
4034 free_all_extent_backrefs(rec);
4039 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4040 struct extent_record *rec)
4042 if (rec->content_checked && rec->owner_ref_checked &&
4043 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4044 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4045 !rec->bad_full_backref && !rec->crossing_stripes &&
4046 !rec->wrong_chunk_type) {
4047 remove_cache_extent(extent_cache, &rec->cache);
4048 free_all_extent_backrefs(rec);
4049 list_del_init(&rec->list);
4055 static int check_owner_ref(struct btrfs_root *root,
4056 struct extent_record *rec,
4057 struct extent_buffer *buf)
4059 struct extent_backref *node, *tmp;
4060 struct tree_backref *back;
4061 struct btrfs_root *ref_root;
4062 struct btrfs_key key;
4063 struct btrfs_path path;
4064 struct extent_buffer *parent;
4069 rbtree_postorder_for_each_entry_safe(node, tmp,
4070 &rec->backref_tree, node) {
4073 if (!node->found_ref)
4075 if (node->full_backref)
4077 back = to_tree_backref(node);
4078 if (btrfs_header_owner(buf) == back->root)
4081 BUG_ON(rec->is_root);
4083 /* try to find the block by search corresponding fs tree */
4084 key.objectid = btrfs_header_owner(buf);
4085 key.type = BTRFS_ROOT_ITEM_KEY;
4086 key.offset = (u64)-1;
4088 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4089 if (IS_ERR(ref_root))
4092 level = btrfs_header_level(buf);
4094 btrfs_item_key_to_cpu(buf, &key, 0);
4096 btrfs_node_key_to_cpu(buf, &key, 0);
4098 btrfs_init_path(&path);
4099 path.lowest_level = level + 1;
4100 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4104 parent = path.nodes[level + 1];
4105 if (parent && buf->start == btrfs_node_blockptr(parent,
4106 path.slots[level + 1]))
4109 btrfs_release_path(&path);
4110 return found ? 0 : 1;
4113 static int is_extent_tree_record(struct extent_record *rec)
4115 struct extent_backref *ref, *tmp;
4116 struct tree_backref *back;
4119 rbtree_postorder_for_each_entry_safe(ref, tmp,
4120 &rec->backref_tree, node) {
4123 back = to_tree_backref(ref);
4124 if (ref->full_backref)
4126 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4133 static int record_bad_block_io(struct btrfs_fs_info *info,
4134 struct cache_tree *extent_cache,
4137 struct extent_record *rec;
4138 struct cache_extent *cache;
4139 struct btrfs_key key;
4141 cache = lookup_cache_extent(extent_cache, start, len);
4145 rec = container_of(cache, struct extent_record, cache);
4146 if (!is_extent_tree_record(rec))
4149 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4150 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4153 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4154 struct extent_buffer *buf, int slot)
4156 if (btrfs_header_level(buf)) {
4157 struct btrfs_key_ptr ptr1, ptr2;
4159 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4160 sizeof(struct btrfs_key_ptr));
4161 read_extent_buffer(buf, &ptr2,
4162 btrfs_node_key_ptr_offset(slot + 1),
4163 sizeof(struct btrfs_key_ptr));
4164 write_extent_buffer(buf, &ptr1,
4165 btrfs_node_key_ptr_offset(slot + 1),
4166 sizeof(struct btrfs_key_ptr));
4167 write_extent_buffer(buf, &ptr2,
4168 btrfs_node_key_ptr_offset(slot),
4169 sizeof(struct btrfs_key_ptr));
4171 struct btrfs_disk_key key;
4172 btrfs_node_key(buf, &key, 0);
4173 btrfs_fixup_low_keys(root, path, &key,
4174 btrfs_header_level(buf) + 1);
4177 struct btrfs_item *item1, *item2;
4178 struct btrfs_key k1, k2;
4179 char *item1_data, *item2_data;
4180 u32 item1_offset, item2_offset, item1_size, item2_size;
4182 item1 = btrfs_item_nr(slot);
4183 item2 = btrfs_item_nr(slot + 1);
4184 btrfs_item_key_to_cpu(buf, &k1, slot);
4185 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4186 item1_offset = btrfs_item_offset(buf, item1);
4187 item2_offset = btrfs_item_offset(buf, item2);
4188 item1_size = btrfs_item_size(buf, item1);
4189 item2_size = btrfs_item_size(buf, item2);
4191 item1_data = malloc(item1_size);
4194 item2_data = malloc(item2_size);
4200 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4201 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4203 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4204 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4208 btrfs_set_item_offset(buf, item1, item2_offset);
4209 btrfs_set_item_offset(buf, item2, item1_offset);
4210 btrfs_set_item_size(buf, item1, item2_size);
4211 btrfs_set_item_size(buf, item2, item1_size);
4213 path->slots[0] = slot;
4214 btrfs_set_item_key_unsafe(root, path, &k2);
4215 path->slots[0] = slot + 1;
4216 btrfs_set_item_key_unsafe(root, path, &k1);
4221 static int fix_key_order(struct btrfs_trans_handle *trans,
4222 struct btrfs_root *root,
4223 struct btrfs_path *path)
4225 struct extent_buffer *buf;
4226 struct btrfs_key k1, k2;
4228 int level = path->lowest_level;
4231 buf = path->nodes[level];
4232 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4234 btrfs_node_key_to_cpu(buf, &k1, i);
4235 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4237 btrfs_item_key_to_cpu(buf, &k1, i);
4238 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4240 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4242 ret = swap_values(root, path, buf, i);
4245 btrfs_mark_buffer_dirty(buf);
4251 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4252 struct btrfs_root *root,
4253 struct btrfs_path *path,
4254 struct extent_buffer *buf, int slot)
4256 struct btrfs_key key;
4257 int nritems = btrfs_header_nritems(buf);
4259 btrfs_item_key_to_cpu(buf, &key, slot);
4261 /* These are all the keys we can deal with missing. */
4262 if (key.type != BTRFS_DIR_INDEX_KEY &&
4263 key.type != BTRFS_EXTENT_ITEM_KEY &&
4264 key.type != BTRFS_METADATA_ITEM_KEY &&
4265 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4266 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4269 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4270 (unsigned long long)key.objectid, key.type,
4271 (unsigned long long)key.offset, slot, buf->start);
4272 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4273 btrfs_item_nr_offset(slot + 1),
4274 sizeof(struct btrfs_item) *
4275 (nritems - slot - 1));
4276 btrfs_set_header_nritems(buf, nritems - 1);
4278 struct btrfs_disk_key disk_key;
4280 btrfs_item_key(buf, &disk_key, 0);
4281 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4283 btrfs_mark_buffer_dirty(buf);
4287 static int fix_item_offset(struct btrfs_trans_handle *trans,
4288 struct btrfs_root *root,
4289 struct btrfs_path *path)
4291 struct extent_buffer *buf;
4295 /* We should only get this for leaves */
4296 BUG_ON(path->lowest_level);
4297 buf = path->nodes[0];
4299 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4300 unsigned int shift = 0, offset;
4302 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4303 BTRFS_LEAF_DATA_SIZE(root)) {
4304 if (btrfs_item_end_nr(buf, i) >
4305 BTRFS_LEAF_DATA_SIZE(root)) {
4306 ret = delete_bogus_item(trans, root, path,
4310 fprintf(stderr, "item is off the end of the "
4311 "leaf, can't fix\n");
4315 shift = BTRFS_LEAF_DATA_SIZE(root) -
4316 btrfs_item_end_nr(buf, i);
4317 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4318 btrfs_item_offset_nr(buf, i - 1)) {
4319 if (btrfs_item_end_nr(buf, i) >
4320 btrfs_item_offset_nr(buf, i - 1)) {
4321 ret = delete_bogus_item(trans, root, path,
4325 fprintf(stderr, "items overlap, can't fix\n");
4329 shift = btrfs_item_offset_nr(buf, i - 1) -
4330 btrfs_item_end_nr(buf, i);
4335 printf("Shifting item nr %d by %u bytes in block %llu\n",
4336 i, shift, (unsigned long long)buf->start);
4337 offset = btrfs_item_offset_nr(buf, i);
4338 memmove_extent_buffer(buf,
4339 btrfs_leaf_data(buf) + offset + shift,
4340 btrfs_leaf_data(buf) + offset,
4341 btrfs_item_size_nr(buf, i));
4342 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4344 btrfs_mark_buffer_dirty(buf);
4348 * We may have moved things, in which case we want to exit so we don't
4349 * write those changes out. Once we have proper abort functionality in
4350 * progs this can be changed to something nicer.
4357 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4358 * then just return -EIO.
4360 static int try_to_fix_bad_block(struct btrfs_root *root,
4361 struct extent_buffer *buf,
4362 enum btrfs_tree_block_status status)
4364 struct btrfs_trans_handle *trans;
4365 struct ulist *roots;
4366 struct ulist_node *node;
4367 struct btrfs_root *search_root;
4368 struct btrfs_path *path;
4369 struct ulist_iterator iter;
4370 struct btrfs_key root_key, key;
4373 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4374 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4377 path = btrfs_alloc_path();
4381 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4384 btrfs_free_path(path);
4388 ULIST_ITER_INIT(&iter);
4389 while ((node = ulist_next(roots, &iter))) {
4390 root_key.objectid = node->val;
4391 root_key.type = BTRFS_ROOT_ITEM_KEY;
4392 root_key.offset = (u64)-1;
4394 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4401 trans = btrfs_start_transaction(search_root, 0);
4402 if (IS_ERR(trans)) {
4403 ret = PTR_ERR(trans);
4407 path->lowest_level = btrfs_header_level(buf);
4408 path->skip_check_block = 1;
4409 if (path->lowest_level)
4410 btrfs_node_key_to_cpu(buf, &key, 0);
4412 btrfs_item_key_to_cpu(buf, &key, 0);
4413 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4416 btrfs_commit_transaction(trans, search_root);
4419 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4420 ret = fix_key_order(trans, search_root, path);
4421 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4422 ret = fix_item_offset(trans, search_root, path);
4424 btrfs_commit_transaction(trans, search_root);
4427 btrfs_release_path(path);
4428 btrfs_commit_transaction(trans, search_root);
4431 btrfs_free_path(path);
4435 static int check_block(struct btrfs_root *root,
4436 struct cache_tree *extent_cache,
4437 struct extent_buffer *buf, u64 flags)
4439 struct extent_record *rec;
4440 struct cache_extent *cache;
4441 struct btrfs_key key;
4442 enum btrfs_tree_block_status status;
4446 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4449 rec = container_of(cache, struct extent_record, cache);
4450 rec->generation = btrfs_header_generation(buf);
4452 level = btrfs_header_level(buf);
4453 if (btrfs_header_nritems(buf) > 0) {
4456 btrfs_item_key_to_cpu(buf, &key, 0);
4458 btrfs_node_key_to_cpu(buf, &key, 0);
4460 rec->info_objectid = key.objectid;
4462 rec->info_level = level;
4464 if (btrfs_is_leaf(buf))
4465 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4467 status = btrfs_check_node(root, &rec->parent_key, buf);
4469 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4471 status = try_to_fix_bad_block(root, buf, status);
4472 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4474 fprintf(stderr, "bad block %llu\n",
4475 (unsigned long long)buf->start);
4478 * Signal to callers we need to start the scan over
4479 * again since we'll have cowed blocks.
4484 rec->content_checked = 1;
4485 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4486 rec->owner_ref_checked = 1;
4488 ret = check_owner_ref(root, rec, buf);
4490 rec->owner_ref_checked = 1;
4494 maybe_free_extent_rec(extent_cache, rec);
4499 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4500 u64 parent, u64 root)
4502 struct rb_node *node;
4503 struct tree_backref *back = NULL;
4504 struct tree_backref match = {
4511 match.parent = parent;
4512 match.node.full_backref = 1;
4517 node = rb_search(&rec->backref_tree, &match.node.node,
4518 (rb_compare_keys)compare_extent_backref, NULL);
4520 back = to_tree_backref(rb_node_to_extent_backref(node));
4525 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4526 u64 parent, u64 root)
4528 struct tree_backref *ref = malloc(sizeof(*ref));
4532 memset(&ref->node, 0, sizeof(ref->node));
4534 ref->parent = parent;
4535 ref->node.full_backref = 1;
4538 ref->node.full_backref = 0;
4540 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4545 static struct data_backref *find_data_backref(struct extent_record *rec,
4546 u64 parent, u64 root,
4547 u64 owner, u64 offset,
4549 u64 disk_bytenr, u64 bytes)
4551 struct rb_node *node;
4552 struct data_backref *back = NULL;
4553 struct data_backref match = {
4560 .found_ref = found_ref,
4561 .disk_bytenr = disk_bytenr,
4565 match.parent = parent;
4566 match.node.full_backref = 1;
4571 node = rb_search(&rec->backref_tree, &match.node.node,
4572 (rb_compare_keys)compare_extent_backref, NULL);
4574 back = to_data_backref(rb_node_to_extent_backref(node));
4579 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4580 u64 parent, u64 root,
4581 u64 owner, u64 offset,
4584 struct data_backref *ref = malloc(sizeof(*ref));
4588 memset(&ref->node, 0, sizeof(ref->node));
4589 ref->node.is_data = 1;
4592 ref->parent = parent;
4595 ref->node.full_backref = 1;
4599 ref->offset = offset;
4600 ref->node.full_backref = 0;
4602 ref->bytes = max_size;
4605 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4606 if (max_size > rec->max_size)
4607 rec->max_size = max_size;
4611 /* Check if the type of extent matches with its chunk */
4612 static void check_extent_type(struct extent_record *rec)
4614 struct btrfs_block_group_cache *bg_cache;
4616 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4620 /* data extent, check chunk directly*/
4621 if (!rec->metadata) {
4622 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4623 rec->wrong_chunk_type = 1;
4627 /* metadata extent, check the obvious case first */
4628 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4629 BTRFS_BLOCK_GROUP_METADATA))) {
4630 rec->wrong_chunk_type = 1;
4635 * Check SYSTEM extent, as it's also marked as metadata, we can only
4636 * make sure it's a SYSTEM extent by its backref
4638 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4639 struct extent_backref *node;
4640 struct tree_backref *tback;
4643 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4644 if (node->is_data) {
4645 /* tree block shouldn't have data backref */
4646 rec->wrong_chunk_type = 1;
4649 tback = container_of(node, struct tree_backref, node);
4651 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4652 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4654 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4655 if (!(bg_cache->flags & bg_type))
4656 rec->wrong_chunk_type = 1;
4661 * Allocate a new extent record, fill default values from @tmpl and insert int
4662 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4663 * the cache, otherwise it fails.
4665 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4666 struct extent_record *tmpl)
4668 struct extent_record *rec;
4671 rec = malloc(sizeof(*rec));
4674 rec->start = tmpl->start;
4675 rec->max_size = tmpl->max_size;
4676 rec->nr = max(tmpl->nr, tmpl->max_size);
4677 rec->found_rec = tmpl->found_rec;
4678 rec->content_checked = tmpl->content_checked;
4679 rec->owner_ref_checked = tmpl->owner_ref_checked;
4680 rec->num_duplicates = 0;
4681 rec->metadata = tmpl->metadata;
4682 rec->flag_block_full_backref = FLAG_UNSET;
4683 rec->bad_full_backref = 0;
4684 rec->crossing_stripes = 0;
4685 rec->wrong_chunk_type = 0;
4686 rec->is_root = tmpl->is_root;
4687 rec->refs = tmpl->refs;
4688 rec->extent_item_refs = tmpl->extent_item_refs;
4689 rec->parent_generation = tmpl->parent_generation;
4690 INIT_LIST_HEAD(&rec->backrefs);
4691 INIT_LIST_HEAD(&rec->dups);
4692 INIT_LIST_HEAD(&rec->list);
4693 rec->backref_tree = RB_ROOT;
4694 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4695 rec->cache.start = tmpl->start;
4696 rec->cache.size = tmpl->nr;
4697 ret = insert_cache_extent(extent_cache, &rec->cache);
4699 bytes_used += rec->nr;
4702 rec->crossing_stripes = check_crossing_stripes(rec->start,
4703 global_info->tree_root->nodesize);
4704 check_extent_type(rec);
4709 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4711 * - refs - if found, increase refs
4712 * - is_root - if found, set
4713 * - content_checked - if found, set
4714 * - owner_ref_checked - if found, set
4716 * If not found, create a new one, initialize and insert.
4718 static int add_extent_rec(struct cache_tree *extent_cache,
4719 struct extent_record *tmpl)
4721 struct extent_record *rec;
4722 struct cache_extent *cache;
4726 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4728 rec = container_of(cache, struct extent_record, cache);
4732 rec->nr = max(tmpl->nr, tmpl->max_size);
4735 * We need to make sure to reset nr to whatever the extent
4736 * record says was the real size, this way we can compare it to
4739 if (tmpl->found_rec) {
4740 if (tmpl->start != rec->start || rec->found_rec) {
4741 struct extent_record *tmp;
4744 if (list_empty(&rec->list))
4745 list_add_tail(&rec->list,
4746 &duplicate_extents);
4749 * We have to do this song and dance in case we
4750 * find an extent record that falls inside of
4751 * our current extent record but does not have
4752 * the same objectid.
4754 tmp = malloc(sizeof(*tmp));
4757 tmp->start = tmpl->start;
4758 tmp->max_size = tmpl->max_size;
4761 tmp->metadata = tmpl->metadata;
4762 tmp->extent_item_refs = tmpl->extent_item_refs;
4763 INIT_LIST_HEAD(&tmp->list);
4764 list_add_tail(&tmp->list, &rec->dups);
4765 rec->num_duplicates++;
4772 if (tmpl->extent_item_refs && !dup) {
4773 if (rec->extent_item_refs) {
4774 fprintf(stderr, "block %llu rec "
4775 "extent_item_refs %llu, passed %llu\n",
4776 (unsigned long long)tmpl->start,
4777 (unsigned long long)
4778 rec->extent_item_refs,
4779 (unsigned long long)tmpl->extent_item_refs);
4781 rec->extent_item_refs = tmpl->extent_item_refs;
4785 if (tmpl->content_checked)
4786 rec->content_checked = 1;
4787 if (tmpl->owner_ref_checked)
4788 rec->owner_ref_checked = 1;
4789 memcpy(&rec->parent_key, &tmpl->parent_key,
4790 sizeof(tmpl->parent_key));
4791 if (tmpl->parent_generation)
4792 rec->parent_generation = tmpl->parent_generation;
4793 if (rec->max_size < tmpl->max_size)
4794 rec->max_size = tmpl->max_size;
4797 * A metadata extent can't cross stripe_len boundary, otherwise
4798 * kernel scrub won't be able to handle it.
4799 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4803 rec->crossing_stripes = check_crossing_stripes(
4804 rec->start, global_info->tree_root->nodesize);
4805 check_extent_type(rec);
4806 maybe_free_extent_rec(extent_cache, rec);
4810 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4815 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4816 u64 parent, u64 root, int found_ref)
4818 struct extent_record *rec;
4819 struct tree_backref *back;
4820 struct cache_extent *cache;
4822 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4824 struct extent_record tmpl;
4826 memset(&tmpl, 0, sizeof(tmpl));
4827 tmpl.start = bytenr;
4831 add_extent_rec_nolookup(extent_cache, &tmpl);
4833 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4838 rec = container_of(cache, struct extent_record, cache);
4839 if (rec->start != bytenr) {
4843 back = find_tree_backref(rec, parent, root);
4845 back = alloc_tree_backref(rec, parent, root);
4850 if (back->node.found_ref) {
4851 fprintf(stderr, "Extent back ref already exists "
4852 "for %llu parent %llu root %llu \n",
4853 (unsigned long long)bytenr,
4854 (unsigned long long)parent,
4855 (unsigned long long)root);
4857 back->node.found_ref = 1;
4859 if (back->node.found_extent_tree) {
4860 fprintf(stderr, "Extent back ref already exists "
4861 "for %llu parent %llu root %llu \n",
4862 (unsigned long long)bytenr,
4863 (unsigned long long)parent,
4864 (unsigned long long)root);
4866 back->node.found_extent_tree = 1;
4868 check_extent_type(rec);
4869 maybe_free_extent_rec(extent_cache, rec);
4873 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4874 u64 parent, u64 root, u64 owner, u64 offset,
4875 u32 num_refs, int found_ref, u64 max_size)
4877 struct extent_record *rec;
4878 struct data_backref *back;
4879 struct cache_extent *cache;
4881 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4883 struct extent_record tmpl;
4885 memset(&tmpl, 0, sizeof(tmpl));
4886 tmpl.start = bytenr;
4888 tmpl.max_size = max_size;
4890 add_extent_rec_nolookup(extent_cache, &tmpl);
4892 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4897 rec = container_of(cache, struct extent_record, cache);
4898 if (rec->max_size < max_size)
4899 rec->max_size = max_size;
4902 * If found_ref is set then max_size is the real size and must match the
4903 * existing refs. So if we have already found a ref then we need to
4904 * make sure that this ref matches the existing one, otherwise we need
4905 * to add a new backref so we can notice that the backrefs don't match
4906 * and we need to figure out who is telling the truth. This is to
4907 * account for that awful fsync bug I introduced where we'd end up with
4908 * a btrfs_file_extent_item that would have its length include multiple
4909 * prealloc extents or point inside of a prealloc extent.
4911 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4914 back = alloc_data_backref(rec, parent, root, owner, offset,
4920 BUG_ON(num_refs != 1);
4921 if (back->node.found_ref)
4922 BUG_ON(back->bytes != max_size);
4923 back->node.found_ref = 1;
4924 back->found_ref += 1;
4925 back->bytes = max_size;
4926 back->disk_bytenr = bytenr;
4928 rec->content_checked = 1;
4929 rec->owner_ref_checked = 1;
4931 if (back->node.found_extent_tree) {
4932 fprintf(stderr, "Extent back ref already exists "
4933 "for %llu parent %llu root %llu "
4934 "owner %llu offset %llu num_refs %lu\n",
4935 (unsigned long long)bytenr,
4936 (unsigned long long)parent,
4937 (unsigned long long)root,
4938 (unsigned long long)owner,
4939 (unsigned long long)offset,
4940 (unsigned long)num_refs);
4942 back->num_refs = num_refs;
4943 back->node.found_extent_tree = 1;
4945 maybe_free_extent_rec(extent_cache, rec);
4949 static int add_pending(struct cache_tree *pending,
4950 struct cache_tree *seen, u64 bytenr, u32 size)
4953 ret = add_cache_extent(seen, bytenr, size);
4956 add_cache_extent(pending, bytenr, size);
4960 static int pick_next_pending(struct cache_tree *pending,
4961 struct cache_tree *reada,
4962 struct cache_tree *nodes,
4963 u64 last, struct block_info *bits, int bits_nr,
4966 unsigned long node_start = last;
4967 struct cache_extent *cache;
4970 cache = search_cache_extent(reada, 0);
4972 bits[0].start = cache->start;
4973 bits[0].size = cache->size;
4978 if (node_start > 32768)
4979 node_start -= 32768;
4981 cache = search_cache_extent(nodes, node_start);
4983 cache = search_cache_extent(nodes, 0);
4986 cache = search_cache_extent(pending, 0);
4991 bits[ret].start = cache->start;
4992 bits[ret].size = cache->size;
4993 cache = next_cache_extent(cache);
4995 } while (cache && ret < bits_nr);
5001 bits[ret].start = cache->start;
5002 bits[ret].size = cache->size;
5003 cache = next_cache_extent(cache);
5005 } while (cache && ret < bits_nr);
5007 if (bits_nr - ret > 8) {
5008 u64 lookup = bits[0].start + bits[0].size;
5009 struct cache_extent *next;
5010 next = search_cache_extent(pending, lookup);
5012 if (next->start - lookup > 32768)
5014 bits[ret].start = next->start;
5015 bits[ret].size = next->size;
5016 lookup = next->start + next->size;
5020 next = next_cache_extent(next);
5028 static void free_chunk_record(struct cache_extent *cache)
5030 struct chunk_record *rec;
5032 rec = container_of(cache, struct chunk_record, cache);
5033 list_del_init(&rec->list);
5034 list_del_init(&rec->dextents);
5038 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5040 cache_tree_free_extents(chunk_cache, free_chunk_record);
5043 static void free_device_record(struct rb_node *node)
5045 struct device_record *rec;
5047 rec = container_of(node, struct device_record, node);
5051 FREE_RB_BASED_TREE(device_cache, free_device_record);
5053 int insert_block_group_record(struct block_group_tree *tree,
5054 struct block_group_record *bg_rec)
5058 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5062 list_add_tail(&bg_rec->list, &tree->block_groups);
5066 static void free_block_group_record(struct cache_extent *cache)
5068 struct block_group_record *rec;
5070 rec = container_of(cache, struct block_group_record, cache);
5071 list_del_init(&rec->list);
5075 void free_block_group_tree(struct block_group_tree *tree)
5077 cache_tree_free_extents(&tree->tree, free_block_group_record);
5080 int insert_device_extent_record(struct device_extent_tree *tree,
5081 struct device_extent_record *de_rec)
5086 * Device extent is a bit different from the other extents, because
5087 * the extents which belong to the different devices may have the
5088 * same start and size, so we need use the special extent cache
5089 * search/insert functions.
5091 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5095 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5096 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5100 static void free_device_extent_record(struct cache_extent *cache)
5102 struct device_extent_record *rec;
5104 rec = container_of(cache, struct device_extent_record, cache);
5105 if (!list_empty(&rec->chunk_list))
5106 list_del_init(&rec->chunk_list);
5107 if (!list_empty(&rec->device_list))
5108 list_del_init(&rec->device_list);
5112 void free_device_extent_tree(struct device_extent_tree *tree)
5114 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5117 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5118 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5119 struct extent_buffer *leaf, int slot)
5121 struct btrfs_extent_ref_v0 *ref0;
5122 struct btrfs_key key;
5124 btrfs_item_key_to_cpu(leaf, &key, slot);
5125 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5126 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5127 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5129 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5130 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5136 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5137 struct btrfs_key *key,
5140 struct btrfs_chunk *ptr;
5141 struct chunk_record *rec;
5144 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5145 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5147 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5149 fprintf(stderr, "memory allocation failed\n");
5153 INIT_LIST_HEAD(&rec->list);
5154 INIT_LIST_HEAD(&rec->dextents);
5157 rec->cache.start = key->offset;
5158 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5160 rec->generation = btrfs_header_generation(leaf);
5162 rec->objectid = key->objectid;
5163 rec->type = key->type;
5164 rec->offset = key->offset;
5166 rec->length = rec->cache.size;
5167 rec->owner = btrfs_chunk_owner(leaf, ptr);
5168 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5169 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5170 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5171 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5172 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5173 rec->num_stripes = num_stripes;
5174 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5176 for (i = 0; i < rec->num_stripes; ++i) {
5177 rec->stripes[i].devid =
5178 btrfs_stripe_devid_nr(leaf, ptr, i);
5179 rec->stripes[i].offset =
5180 btrfs_stripe_offset_nr(leaf, ptr, i);
5181 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5182 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5189 static int process_chunk_item(struct cache_tree *chunk_cache,
5190 struct btrfs_key *key, struct extent_buffer *eb,
5193 struct chunk_record *rec;
5196 rec = btrfs_new_chunk_record(eb, key, slot);
5197 ret = insert_cache_extent(chunk_cache, &rec->cache);
5199 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5200 rec->offset, rec->length);
5207 static int process_device_item(struct rb_root *dev_cache,
5208 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5210 struct btrfs_dev_item *ptr;
5211 struct device_record *rec;
5214 ptr = btrfs_item_ptr(eb,
5215 slot, struct btrfs_dev_item);
5217 rec = malloc(sizeof(*rec));
5219 fprintf(stderr, "memory allocation failed\n");
5223 rec->devid = key->offset;
5224 rec->generation = btrfs_header_generation(eb);
5226 rec->objectid = key->objectid;
5227 rec->type = key->type;
5228 rec->offset = key->offset;
5230 rec->devid = btrfs_device_id(eb, ptr);
5231 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5232 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5234 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5236 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5243 struct block_group_record *
5244 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5247 struct btrfs_block_group_item *ptr;
5248 struct block_group_record *rec;
5250 rec = calloc(1, sizeof(*rec));
5252 fprintf(stderr, "memory allocation failed\n");
5256 rec->cache.start = key->objectid;
5257 rec->cache.size = key->offset;
5259 rec->generation = btrfs_header_generation(leaf);
5261 rec->objectid = key->objectid;
5262 rec->type = key->type;
5263 rec->offset = key->offset;
5265 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5266 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5268 INIT_LIST_HEAD(&rec->list);
5273 static int process_block_group_item(struct block_group_tree *block_group_cache,
5274 struct btrfs_key *key,
5275 struct extent_buffer *eb, int slot)
5277 struct block_group_record *rec;
5280 rec = btrfs_new_block_group_record(eb, key, slot);
5281 ret = insert_block_group_record(block_group_cache, rec);
5283 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5284 rec->objectid, rec->offset);
5291 struct device_extent_record *
5292 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5293 struct btrfs_key *key, int slot)
5295 struct device_extent_record *rec;
5296 struct btrfs_dev_extent *ptr;
5298 rec = calloc(1, sizeof(*rec));
5300 fprintf(stderr, "memory allocation failed\n");
5304 rec->cache.objectid = key->objectid;
5305 rec->cache.start = key->offset;
5307 rec->generation = btrfs_header_generation(leaf);
5309 rec->objectid = key->objectid;
5310 rec->type = key->type;
5311 rec->offset = key->offset;
5313 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5314 rec->chunk_objecteid =
5315 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5317 btrfs_dev_extent_chunk_offset(leaf, ptr);
5318 rec->length = btrfs_dev_extent_length(leaf, ptr);
5319 rec->cache.size = rec->length;
5321 INIT_LIST_HEAD(&rec->chunk_list);
5322 INIT_LIST_HEAD(&rec->device_list);
5328 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5329 struct btrfs_key *key, struct extent_buffer *eb,
5332 struct device_extent_record *rec;
5335 rec = btrfs_new_device_extent_record(eb, key, slot);
5336 ret = insert_device_extent_record(dev_extent_cache, rec);
5339 "Device extent[%llu, %llu, %llu] existed.\n",
5340 rec->objectid, rec->offset, rec->length);
5347 static int process_extent_item(struct btrfs_root *root,
5348 struct cache_tree *extent_cache,
5349 struct extent_buffer *eb, int slot)
5351 struct btrfs_extent_item *ei;
5352 struct btrfs_extent_inline_ref *iref;
5353 struct btrfs_extent_data_ref *dref;
5354 struct btrfs_shared_data_ref *sref;
5355 struct btrfs_key key;
5356 struct extent_record tmpl;
5360 u32 item_size = btrfs_item_size_nr(eb, slot);
5366 btrfs_item_key_to_cpu(eb, &key, slot);
5368 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5370 num_bytes = root->nodesize;
5372 num_bytes = key.offset;
5375 if (item_size < sizeof(*ei)) {
5376 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5377 struct btrfs_extent_item_v0 *ei0;
5378 BUG_ON(item_size != sizeof(*ei0));
5379 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5380 refs = btrfs_extent_refs_v0(eb, ei0);
5384 memset(&tmpl, 0, sizeof(tmpl));
5385 tmpl.start = key.objectid;
5386 tmpl.nr = num_bytes;
5387 tmpl.extent_item_refs = refs;
5388 tmpl.metadata = metadata;
5390 tmpl.max_size = num_bytes;
5392 return add_extent_rec(extent_cache, &tmpl);
5395 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5396 refs = btrfs_extent_refs(eb, ei);
5397 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5402 memset(&tmpl, 0, sizeof(tmpl));
5403 tmpl.start = key.objectid;
5404 tmpl.nr = num_bytes;
5405 tmpl.extent_item_refs = refs;
5406 tmpl.metadata = metadata;
5408 tmpl.max_size = num_bytes;
5409 add_extent_rec(extent_cache, &tmpl);
5411 ptr = (unsigned long)(ei + 1);
5412 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5413 key.type == BTRFS_EXTENT_ITEM_KEY)
5414 ptr += sizeof(struct btrfs_tree_block_info);
5416 end = (unsigned long)ei + item_size;
5418 iref = (struct btrfs_extent_inline_ref *)ptr;
5419 type = btrfs_extent_inline_ref_type(eb, iref);
5420 offset = btrfs_extent_inline_ref_offset(eb, iref);
5422 case BTRFS_TREE_BLOCK_REF_KEY:
5423 add_tree_backref(extent_cache, key.objectid,
5426 case BTRFS_SHARED_BLOCK_REF_KEY:
5427 add_tree_backref(extent_cache, key.objectid,
5430 case BTRFS_EXTENT_DATA_REF_KEY:
5431 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5432 add_data_backref(extent_cache, key.objectid, 0,
5433 btrfs_extent_data_ref_root(eb, dref),
5434 btrfs_extent_data_ref_objectid(eb,
5436 btrfs_extent_data_ref_offset(eb, dref),
5437 btrfs_extent_data_ref_count(eb, dref),
5440 case BTRFS_SHARED_DATA_REF_KEY:
5441 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5442 add_data_backref(extent_cache, key.objectid, offset,
5444 btrfs_shared_data_ref_count(eb, sref),
5448 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5449 key.objectid, key.type, num_bytes);
5452 ptr += btrfs_extent_inline_ref_size(type);
5459 static int check_cache_range(struct btrfs_root *root,
5460 struct btrfs_block_group_cache *cache,
5461 u64 offset, u64 bytes)
5463 struct btrfs_free_space *entry;
5469 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5470 bytenr = btrfs_sb_offset(i);
5471 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5472 cache->key.objectid, bytenr, 0,
5473 &logical, &nr, &stripe_len);
5478 if (logical[nr] + stripe_len <= offset)
5480 if (offset + bytes <= logical[nr])
5482 if (logical[nr] == offset) {
5483 if (stripe_len >= bytes) {
5487 bytes -= stripe_len;
5488 offset += stripe_len;
5489 } else if (logical[nr] < offset) {
5490 if (logical[nr] + stripe_len >=
5495 bytes = (offset + bytes) -
5496 (logical[nr] + stripe_len);
5497 offset = logical[nr] + stripe_len;
5500 * Could be tricky, the super may land in the
5501 * middle of the area we're checking. First
5502 * check the easiest case, it's at the end.
5504 if (logical[nr] + stripe_len >=
5506 bytes = logical[nr] - offset;
5510 /* Check the left side */
5511 ret = check_cache_range(root, cache,
5513 logical[nr] - offset);
5519 /* Now we continue with the right side */
5520 bytes = (offset + bytes) -
5521 (logical[nr] + stripe_len);
5522 offset = logical[nr] + stripe_len;
5529 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5531 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5532 offset, offset+bytes);
5536 if (entry->offset != offset) {
5537 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5542 if (entry->bytes != bytes) {
5543 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5544 bytes, entry->bytes, offset);
5548 unlink_free_space(cache->free_space_ctl, entry);
5553 static int verify_space_cache(struct btrfs_root *root,
5554 struct btrfs_block_group_cache *cache)
5556 struct btrfs_path *path;
5557 struct extent_buffer *leaf;
5558 struct btrfs_key key;
5562 path = btrfs_alloc_path();
5566 root = root->fs_info->extent_root;
5568 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5570 key.objectid = last;
5572 key.type = BTRFS_EXTENT_ITEM_KEY;
5574 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5579 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5580 ret = btrfs_next_leaf(root, path);
5588 leaf = path->nodes[0];
5589 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5590 if (key.objectid >= cache->key.offset + cache->key.objectid)
5592 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5593 key.type != BTRFS_METADATA_ITEM_KEY) {
5598 if (last == key.objectid) {
5599 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5600 last = key.objectid + key.offset;
5602 last = key.objectid + root->nodesize;
5607 ret = check_cache_range(root, cache, last,
5608 key.objectid - last);
5611 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5612 last = key.objectid + key.offset;
5614 last = key.objectid + root->nodesize;
5618 if (last < cache->key.objectid + cache->key.offset)
5619 ret = check_cache_range(root, cache, last,
5620 cache->key.objectid +
5621 cache->key.offset - last);
5624 btrfs_free_path(path);
5627 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5628 fprintf(stderr, "There are still entries left in the space "
5636 static int check_space_cache(struct btrfs_root *root)
5638 struct btrfs_block_group_cache *cache;
5639 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5643 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5644 btrfs_super_generation(root->fs_info->super_copy) !=
5645 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5646 printf("cache and super generation don't match, space cache "
5647 "will be invalidated\n");
5651 if (ctx.progress_enabled) {
5652 ctx.tp = TASK_FREE_SPACE;
5653 task_start(ctx.info);
5657 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5661 start = cache->key.objectid + cache->key.offset;
5662 if (!cache->free_space_ctl) {
5663 if (btrfs_init_free_space_ctl(cache,
5664 root->sectorsize)) {
5669 btrfs_remove_free_space_cache(cache);
5672 if (btrfs_fs_compat_ro(root->fs_info,
5673 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5674 ret = exclude_super_stripes(root, cache);
5676 fprintf(stderr, "could not exclude super stripes: %s\n",
5681 ret = load_free_space_tree(root->fs_info, cache);
5682 free_excluded_extents(root, cache);
5684 fprintf(stderr, "could not load free space tree: %s\n",
5691 ret = load_free_space_cache(root->fs_info, cache);
5696 ret = verify_space_cache(root, cache);
5698 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5699 cache->key.objectid);
5704 task_stop(ctx.info);
5706 return error ? -EINVAL : 0;
5709 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5710 u64 num_bytes, unsigned long leaf_offset,
5711 struct extent_buffer *eb) {
5714 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5716 unsigned long csum_offset;
5720 u64 data_checked = 0;
5726 if (num_bytes % root->sectorsize)
5729 data = malloc(num_bytes);
5733 while (offset < num_bytes) {
5736 read_len = num_bytes - offset;
5737 /* read as much space once a time */
5738 ret = read_extent_data(root, data + offset,
5739 bytenr + offset, &read_len, mirror);
5743 /* verify every 4k data's checksum */
5744 while (data_checked < read_len) {
5746 tmp = offset + data_checked;
5748 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5749 csum, root->sectorsize);
5750 btrfs_csum_final(csum, (char *)&csum);
5752 csum_offset = leaf_offset +
5753 tmp / root->sectorsize * csum_size;
5754 read_extent_buffer(eb, (char *)&csum_expected,
5755 csum_offset, csum_size);
5756 /* try another mirror */
5757 if (csum != csum_expected) {
5758 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5759 mirror, bytenr + tmp,
5760 csum, csum_expected);
5761 num_copies = btrfs_num_copies(
5762 &root->fs_info->mapping_tree,
5764 if (mirror < num_copies - 1) {
5769 data_checked += root->sectorsize;
5778 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5781 struct btrfs_path *path;
5782 struct extent_buffer *leaf;
5783 struct btrfs_key key;
5786 path = btrfs_alloc_path();
5788 fprintf(stderr, "Error allocating path\n");
5792 key.objectid = bytenr;
5793 key.type = BTRFS_EXTENT_ITEM_KEY;
5794 key.offset = (u64)-1;
5797 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5800 fprintf(stderr, "Error looking up extent record %d\n", ret);
5801 btrfs_free_path(path);
5804 if (path->slots[0] > 0) {
5807 ret = btrfs_prev_leaf(root, path);
5810 } else if (ret > 0) {
5817 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5820 * Block group items come before extent items if they have the same
5821 * bytenr, so walk back one more just in case. Dear future traveller,
5822 * first congrats on mastering time travel. Now if it's not too much
5823 * trouble could you go back to 2006 and tell Chris to make the
5824 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5825 * EXTENT_ITEM_KEY please?
5827 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5828 if (path->slots[0] > 0) {
5831 ret = btrfs_prev_leaf(root, path);
5834 } else if (ret > 0) {
5839 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5843 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5844 ret = btrfs_next_leaf(root, path);
5846 fprintf(stderr, "Error going to next leaf "
5848 btrfs_free_path(path);
5854 leaf = path->nodes[0];
5855 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5856 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5860 if (key.objectid + key.offset < bytenr) {
5864 if (key.objectid > bytenr + num_bytes)
5867 if (key.objectid == bytenr) {
5868 if (key.offset >= num_bytes) {
5872 num_bytes -= key.offset;
5873 bytenr += key.offset;
5874 } else if (key.objectid < bytenr) {
5875 if (key.objectid + key.offset >= bytenr + num_bytes) {
5879 num_bytes = (bytenr + num_bytes) -
5880 (key.objectid + key.offset);
5881 bytenr = key.objectid + key.offset;
5883 if (key.objectid + key.offset < bytenr + num_bytes) {
5884 u64 new_start = key.objectid + key.offset;
5885 u64 new_bytes = bytenr + num_bytes - new_start;
5888 * Weird case, the extent is in the middle of
5889 * our range, we'll have to search one side
5890 * and then the other. Not sure if this happens
5891 * in real life, but no harm in coding it up
5892 * anyway just in case.
5894 btrfs_release_path(path);
5895 ret = check_extent_exists(root, new_start,
5898 fprintf(stderr, "Right section didn't "
5902 num_bytes = key.objectid - bytenr;
5905 num_bytes = key.objectid - bytenr;
5912 if (num_bytes && !ret) {
5913 fprintf(stderr, "There are no extents for csum range "
5914 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5918 btrfs_free_path(path);
5922 static int check_csums(struct btrfs_root *root)
5924 struct btrfs_path *path;
5925 struct extent_buffer *leaf;
5926 struct btrfs_key key;
5927 u64 offset = 0, num_bytes = 0;
5928 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5932 unsigned long leaf_offset;
5934 root = root->fs_info->csum_root;
5935 if (!extent_buffer_uptodate(root->node)) {
5936 fprintf(stderr, "No valid csum tree found\n");
5940 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5941 key.type = BTRFS_EXTENT_CSUM_KEY;
5944 path = btrfs_alloc_path();
5948 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5950 fprintf(stderr, "Error searching csum tree %d\n", ret);
5951 btrfs_free_path(path);
5955 if (ret > 0 && path->slots[0])
5960 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5961 ret = btrfs_next_leaf(root, path);
5963 fprintf(stderr, "Error going to next leaf "
5970 leaf = path->nodes[0];
5972 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5973 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5978 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5979 csum_size) * root->sectorsize;
5980 if (!check_data_csum)
5981 goto skip_csum_check;
5982 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5983 ret = check_extent_csums(root, key.offset, data_len,
5989 offset = key.offset;
5990 } else if (key.offset != offset + num_bytes) {
5991 ret = check_extent_exists(root, offset, num_bytes);
5993 fprintf(stderr, "Csum exists for %Lu-%Lu but "
5994 "there is no extent record\n",
5995 offset, offset+num_bytes);
5998 offset = key.offset;
6001 num_bytes += data_len;
6005 btrfs_free_path(path);
6009 static int is_dropped_key(struct btrfs_key *key,
6010 struct btrfs_key *drop_key) {
6011 if (key->objectid < drop_key->objectid)
6013 else if (key->objectid == drop_key->objectid) {
6014 if (key->type < drop_key->type)
6016 else if (key->type == drop_key->type) {
6017 if (key->offset < drop_key->offset)
6025 * Here are the rules for FULL_BACKREF.
6027 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6028 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6030 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6031 * if it happened after the relocation occurred since we'll have dropped the
6032 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6033 * have no real way to know for sure.
6035 * We process the blocks one root at a time, and we start from the lowest root
6036 * objectid and go to the highest. So we can just lookup the owner backref for
6037 * the record and if we don't find it then we know it doesn't exist and we have
6040 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6041 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6042 * be set or not and then we can check later once we've gathered all the refs.
6044 static int calc_extent_flag(struct btrfs_root *root,
6045 struct cache_tree *extent_cache,
6046 struct extent_buffer *buf,
6047 struct root_item_record *ri,
6050 struct extent_record *rec;
6051 struct cache_extent *cache;
6052 struct tree_backref *tback;
6055 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6056 /* we have added this extent before */
6058 rec = container_of(cache, struct extent_record, cache);
6061 * Except file/reloc tree, we can not have
6064 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6069 if (buf->start == ri->bytenr)
6072 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6075 owner = btrfs_header_owner(buf);
6076 if (owner == ri->objectid)
6079 tback = find_tree_backref(rec, 0, owner);
6084 if (rec->flag_block_full_backref != FLAG_UNSET &&
6085 rec->flag_block_full_backref != 0)
6086 rec->bad_full_backref = 1;
6089 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6090 if (rec->flag_block_full_backref != FLAG_UNSET &&
6091 rec->flag_block_full_backref != 1)
6092 rec->bad_full_backref = 1;
6096 static int run_next_block(struct btrfs_root *root,
6097 struct block_info *bits,
6100 struct cache_tree *pending,
6101 struct cache_tree *seen,
6102 struct cache_tree *reada,
6103 struct cache_tree *nodes,
6104 struct cache_tree *extent_cache,
6105 struct cache_tree *chunk_cache,
6106 struct rb_root *dev_cache,
6107 struct block_group_tree *block_group_cache,
6108 struct device_extent_tree *dev_extent_cache,
6109 struct root_item_record *ri)
6111 struct extent_buffer *buf;
6112 struct extent_record *rec = NULL;
6123 struct btrfs_key key;
6124 struct cache_extent *cache;
6127 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6128 bits_nr, &reada_bits);
6133 for(i = 0; i < nritems; i++) {
6134 ret = add_cache_extent(reada, bits[i].start,
6139 /* fixme, get the parent transid */
6140 readahead_tree_block(root, bits[i].start,
6144 *last = bits[0].start;
6145 bytenr = bits[0].start;
6146 size = bits[0].size;
6148 cache = lookup_cache_extent(pending, bytenr, size);
6150 remove_cache_extent(pending, cache);
6153 cache = lookup_cache_extent(reada, bytenr, size);
6155 remove_cache_extent(reada, cache);
6158 cache = lookup_cache_extent(nodes, bytenr, size);
6160 remove_cache_extent(nodes, cache);
6163 cache = lookup_cache_extent(extent_cache, bytenr, size);
6165 rec = container_of(cache, struct extent_record, cache);
6166 gen = rec->parent_generation;
6169 /* fixme, get the real parent transid */
6170 buf = read_tree_block(root, bytenr, size, gen);
6171 if (!extent_buffer_uptodate(buf)) {
6172 record_bad_block_io(root->fs_info,
6173 extent_cache, bytenr, size);
6177 nritems = btrfs_header_nritems(buf);
6180 if (!init_extent_tree) {
6181 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6182 btrfs_header_level(buf), 1, NULL,
6185 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6187 fprintf(stderr, "Couldn't calc extent flags\n");
6188 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6193 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6195 fprintf(stderr, "Couldn't calc extent flags\n");
6196 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6200 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6202 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6203 ri->objectid == btrfs_header_owner(buf)) {
6205 * Ok we got to this block from it's original owner and
6206 * we have FULL_BACKREF set. Relocation can leave
6207 * converted blocks over so this is altogether possible,
6208 * however it's not possible if the generation > the
6209 * last snapshot, so check for this case.
6211 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6212 btrfs_header_generation(buf) > ri->last_snapshot) {
6213 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6214 rec->bad_full_backref = 1;
6219 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6220 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6221 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6222 rec->bad_full_backref = 1;
6226 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6227 rec->flag_block_full_backref = 1;
6231 rec->flag_block_full_backref = 0;
6233 owner = btrfs_header_owner(buf);
6236 ret = check_block(root, extent_cache, buf, flags);
6240 if (btrfs_is_leaf(buf)) {
6241 btree_space_waste += btrfs_leaf_free_space(root, buf);
6242 for (i = 0; i < nritems; i++) {
6243 struct btrfs_file_extent_item *fi;
6244 btrfs_item_key_to_cpu(buf, &key, i);
6245 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6246 process_extent_item(root, extent_cache, buf,
6250 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6251 process_extent_item(root, extent_cache, buf,
6255 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6257 btrfs_item_size_nr(buf, i);
6260 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6261 process_chunk_item(chunk_cache, &key, buf, i);
6264 if (key.type == BTRFS_DEV_ITEM_KEY) {
6265 process_device_item(dev_cache, &key, buf, i);
6268 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6269 process_block_group_item(block_group_cache,
6273 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6274 process_device_extent_item(dev_extent_cache,
6279 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6280 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6281 process_extent_ref_v0(extent_cache, buf, i);
6288 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6289 add_tree_backref(extent_cache, key.objectid, 0,
6293 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6294 add_tree_backref(extent_cache, key.objectid,
6298 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6299 struct btrfs_extent_data_ref *ref;
6300 ref = btrfs_item_ptr(buf, i,
6301 struct btrfs_extent_data_ref);
6302 add_data_backref(extent_cache,
6304 btrfs_extent_data_ref_root(buf, ref),
6305 btrfs_extent_data_ref_objectid(buf,
6307 btrfs_extent_data_ref_offset(buf, ref),
6308 btrfs_extent_data_ref_count(buf, ref),
6309 0, root->sectorsize);
6312 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6313 struct btrfs_shared_data_ref *ref;
6314 ref = btrfs_item_ptr(buf, i,
6315 struct btrfs_shared_data_ref);
6316 add_data_backref(extent_cache,
6317 key.objectid, key.offset, 0, 0, 0,
6318 btrfs_shared_data_ref_count(buf, ref),
6319 0, root->sectorsize);
6322 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6323 struct bad_item *bad;
6325 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6329 bad = malloc(sizeof(struct bad_item));
6332 INIT_LIST_HEAD(&bad->list);
6333 memcpy(&bad->key, &key,
6334 sizeof(struct btrfs_key));
6335 bad->root_id = owner;
6336 list_add_tail(&bad->list, &delete_items);
6339 if (key.type != BTRFS_EXTENT_DATA_KEY)
6341 fi = btrfs_item_ptr(buf, i,
6342 struct btrfs_file_extent_item);
6343 if (btrfs_file_extent_type(buf, fi) ==
6344 BTRFS_FILE_EXTENT_INLINE)
6346 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6349 data_bytes_allocated +=
6350 btrfs_file_extent_disk_num_bytes(buf, fi);
6351 if (data_bytes_allocated < root->sectorsize) {
6354 data_bytes_referenced +=
6355 btrfs_file_extent_num_bytes(buf, fi);
6356 add_data_backref(extent_cache,
6357 btrfs_file_extent_disk_bytenr(buf, fi),
6358 parent, owner, key.objectid, key.offset -
6359 btrfs_file_extent_offset(buf, fi), 1, 1,
6360 btrfs_file_extent_disk_num_bytes(buf, fi));
6364 struct btrfs_key first_key;
6366 first_key.objectid = 0;
6369 btrfs_item_key_to_cpu(buf, &first_key, 0);
6370 level = btrfs_header_level(buf);
6371 for (i = 0; i < nritems; i++) {
6372 struct extent_record tmpl;
6374 ptr = btrfs_node_blockptr(buf, i);
6375 size = root->nodesize;
6376 btrfs_node_key_to_cpu(buf, &key, i);
6378 if ((level == ri->drop_level)
6379 && is_dropped_key(&key, &ri->drop_key)) {
6384 memset(&tmpl, 0, sizeof(tmpl));
6385 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6386 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6391 tmpl.max_size = size;
6392 ret = add_extent_rec(extent_cache, &tmpl);
6395 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6398 add_pending(nodes, seen, ptr, size);
6400 add_pending(pending, seen, ptr, size);
6403 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6404 nritems) * sizeof(struct btrfs_key_ptr);
6406 total_btree_bytes += buf->len;
6407 if (fs_root_objectid(btrfs_header_owner(buf)))
6408 total_fs_tree_bytes += buf->len;
6409 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6410 total_extent_tree_bytes += buf->len;
6411 if (!found_old_backref &&
6412 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6413 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6414 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6415 found_old_backref = 1;
6417 free_extent_buffer(buf);
6421 static int add_root_to_pending(struct extent_buffer *buf,
6422 struct cache_tree *extent_cache,
6423 struct cache_tree *pending,
6424 struct cache_tree *seen,
6425 struct cache_tree *nodes,
6428 struct extent_record tmpl;
6430 if (btrfs_header_level(buf) > 0)
6431 add_pending(nodes, seen, buf->start, buf->len);
6433 add_pending(pending, seen, buf->start, buf->len);
6435 memset(&tmpl, 0, sizeof(tmpl));
6436 tmpl.start = buf->start;
6441 tmpl.max_size = buf->len;
6442 add_extent_rec(extent_cache, &tmpl);
6444 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6445 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6446 add_tree_backref(extent_cache, buf->start, buf->start,
6449 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6453 /* as we fix the tree, we might be deleting blocks that
6454 * we're tracking for repair. This hook makes sure we
6455 * remove any backrefs for blocks as we are fixing them.
6457 static int free_extent_hook(struct btrfs_trans_handle *trans,
6458 struct btrfs_root *root,
6459 u64 bytenr, u64 num_bytes, u64 parent,
6460 u64 root_objectid, u64 owner, u64 offset,
6463 struct extent_record *rec;
6464 struct cache_extent *cache;
6466 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6468 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6469 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6473 rec = container_of(cache, struct extent_record, cache);
6475 struct data_backref *back;
6476 back = find_data_backref(rec, parent, root_objectid, owner,
6477 offset, 1, bytenr, num_bytes);
6480 if (back->node.found_ref) {
6481 back->found_ref -= refs_to_drop;
6483 rec->refs -= refs_to_drop;
6485 if (back->node.found_extent_tree) {
6486 back->num_refs -= refs_to_drop;
6487 if (rec->extent_item_refs)
6488 rec->extent_item_refs -= refs_to_drop;
6490 if (back->found_ref == 0)
6491 back->node.found_ref = 0;
6492 if (back->num_refs == 0)
6493 back->node.found_extent_tree = 0;
6495 if (!back->node.found_extent_tree && back->node.found_ref) {
6496 rb_erase(&back->node.node, &rec->backref_tree);
6500 struct tree_backref *back;
6501 back = find_tree_backref(rec, parent, root_objectid);
6504 if (back->node.found_ref) {
6507 back->node.found_ref = 0;
6509 if (back->node.found_extent_tree) {
6510 if (rec->extent_item_refs)
6511 rec->extent_item_refs--;
6512 back->node.found_extent_tree = 0;
6514 if (!back->node.found_extent_tree && back->node.found_ref) {
6515 rb_erase(&back->node.node, &rec->backref_tree);
6519 maybe_free_extent_rec(extent_cache, rec);
6524 static int delete_extent_records(struct btrfs_trans_handle *trans,
6525 struct btrfs_root *root,
6526 struct btrfs_path *path,
6527 u64 bytenr, u64 new_len)
6529 struct btrfs_key key;
6530 struct btrfs_key found_key;
6531 struct extent_buffer *leaf;
6536 key.objectid = bytenr;
6538 key.offset = (u64)-1;
6541 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6548 if (path->slots[0] == 0)
6554 leaf = path->nodes[0];
6555 slot = path->slots[0];
6557 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6558 if (found_key.objectid != bytenr)
6561 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6562 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6563 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6564 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6565 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6566 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6567 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6568 btrfs_release_path(path);
6569 if (found_key.type == 0) {
6570 if (found_key.offset == 0)
6572 key.offset = found_key.offset - 1;
6573 key.type = found_key.type;
6575 key.type = found_key.type - 1;
6576 key.offset = (u64)-1;
6580 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6581 found_key.objectid, found_key.type, found_key.offset);
6583 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6586 btrfs_release_path(path);
6588 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6589 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6590 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6591 found_key.offset : root->nodesize;
6593 ret = btrfs_update_block_group(trans, root, bytenr,
6600 btrfs_release_path(path);
6605 * for a single backref, this will allocate a new extent
6606 * and add the backref to it.
6608 static int record_extent(struct btrfs_trans_handle *trans,
6609 struct btrfs_fs_info *info,
6610 struct btrfs_path *path,
6611 struct extent_record *rec,
6612 struct extent_backref *back,
6613 int allocated, u64 flags)
6616 struct btrfs_root *extent_root = info->extent_root;
6617 struct extent_buffer *leaf;
6618 struct btrfs_key ins_key;
6619 struct btrfs_extent_item *ei;
6620 struct tree_backref *tback;
6621 struct data_backref *dback;
6622 struct btrfs_tree_block_info *bi;
6625 rec->max_size = max_t(u64, rec->max_size,
6626 info->extent_root->nodesize);
6629 u32 item_size = sizeof(*ei);
6632 item_size += sizeof(*bi);
6634 ins_key.objectid = rec->start;
6635 ins_key.offset = rec->max_size;
6636 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6638 ret = btrfs_insert_empty_item(trans, extent_root, path,
6639 &ins_key, item_size);
6643 leaf = path->nodes[0];
6644 ei = btrfs_item_ptr(leaf, path->slots[0],
6645 struct btrfs_extent_item);
6647 btrfs_set_extent_refs(leaf, ei, 0);
6648 btrfs_set_extent_generation(leaf, ei, rec->generation);
6650 if (back->is_data) {
6651 btrfs_set_extent_flags(leaf, ei,
6652 BTRFS_EXTENT_FLAG_DATA);
6654 struct btrfs_disk_key copy_key;;
6656 tback = to_tree_backref(back);
6657 bi = (struct btrfs_tree_block_info *)(ei + 1);
6658 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6661 btrfs_set_disk_key_objectid(©_key,
6662 rec->info_objectid);
6663 btrfs_set_disk_key_type(©_key, 0);
6664 btrfs_set_disk_key_offset(©_key, 0);
6666 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6667 btrfs_set_tree_block_key(leaf, bi, ©_key);
6669 btrfs_set_extent_flags(leaf, ei,
6670 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6673 btrfs_mark_buffer_dirty(leaf);
6674 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6675 rec->max_size, 1, 0);
6678 btrfs_release_path(path);
6681 if (back->is_data) {
6685 dback = to_data_backref(back);
6686 if (back->full_backref)
6687 parent = dback->parent;
6691 for (i = 0; i < dback->found_ref; i++) {
6692 /* if parent != 0, we're doing a full backref
6693 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6694 * just makes the backref allocator create a data
6697 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6698 rec->start, rec->max_size,
6702 BTRFS_FIRST_FREE_OBJECTID :
6708 fprintf(stderr, "adding new data backref"
6709 " on %llu %s %llu owner %llu"
6710 " offset %llu found %d\n",
6711 (unsigned long long)rec->start,
6712 back->full_backref ?
6714 back->full_backref ?
6715 (unsigned long long)parent :
6716 (unsigned long long)dback->root,
6717 (unsigned long long)dback->owner,
6718 (unsigned long long)dback->offset,
6723 tback = to_tree_backref(back);
6724 if (back->full_backref)
6725 parent = tback->parent;
6729 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6730 rec->start, rec->max_size,
6731 parent, tback->root, 0, 0);
6732 fprintf(stderr, "adding new tree backref on "
6733 "start %llu len %llu parent %llu root %llu\n",
6734 rec->start, rec->max_size, parent, tback->root);
6737 btrfs_release_path(path);
6741 static struct extent_entry *find_entry(struct list_head *entries,
6742 u64 bytenr, u64 bytes)
6744 struct extent_entry *entry = NULL;
6746 list_for_each_entry(entry, entries, list) {
6747 if (entry->bytenr == bytenr && entry->bytes == bytes)
6754 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6756 struct extent_entry *entry, *best = NULL, *prev = NULL;
6758 list_for_each_entry(entry, entries, list) {
6765 * If there are as many broken entries as entries then we know
6766 * not to trust this particular entry.
6768 if (entry->broken == entry->count)
6772 * If our current entry == best then we can't be sure our best
6773 * is really the best, so we need to keep searching.
6775 if (best && best->count == entry->count) {
6781 /* Prev == entry, not good enough, have to keep searching */
6782 if (!prev->broken && prev->count == entry->count)
6786 best = (prev->count > entry->count) ? prev : entry;
6787 else if (best->count < entry->count)
6795 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6796 struct data_backref *dback, struct extent_entry *entry)
6798 struct btrfs_trans_handle *trans;
6799 struct btrfs_root *root;
6800 struct btrfs_file_extent_item *fi;
6801 struct extent_buffer *leaf;
6802 struct btrfs_key key;
6806 key.objectid = dback->root;
6807 key.type = BTRFS_ROOT_ITEM_KEY;
6808 key.offset = (u64)-1;
6809 root = btrfs_read_fs_root(info, &key);
6811 fprintf(stderr, "Couldn't find root for our ref\n");
6816 * The backref points to the original offset of the extent if it was
6817 * split, so we need to search down to the offset we have and then walk
6818 * forward until we find the backref we're looking for.
6820 key.objectid = dback->owner;
6821 key.type = BTRFS_EXTENT_DATA_KEY;
6822 key.offset = dback->offset;
6823 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6825 fprintf(stderr, "Error looking up ref %d\n", ret);
6830 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6831 ret = btrfs_next_leaf(root, path);
6833 fprintf(stderr, "Couldn't find our ref, next\n");
6837 leaf = path->nodes[0];
6838 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6839 if (key.objectid != dback->owner ||
6840 key.type != BTRFS_EXTENT_DATA_KEY) {
6841 fprintf(stderr, "Couldn't find our ref, search\n");
6844 fi = btrfs_item_ptr(leaf, path->slots[0],
6845 struct btrfs_file_extent_item);
6846 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6847 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6849 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6854 btrfs_release_path(path);
6856 trans = btrfs_start_transaction(root, 1);
6858 return PTR_ERR(trans);
6861 * Ok we have the key of the file extent we want to fix, now we can cow
6862 * down to the thing and fix it.
6864 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6866 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6867 key.objectid, key.type, key.offset, ret);
6871 fprintf(stderr, "Well that's odd, we just found this key "
6872 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6877 leaf = path->nodes[0];
6878 fi = btrfs_item_ptr(leaf, path->slots[0],
6879 struct btrfs_file_extent_item);
6881 if (btrfs_file_extent_compression(leaf, fi) &&
6882 dback->disk_bytenr != entry->bytenr) {
6883 fprintf(stderr, "Ref doesn't match the record start and is "
6884 "compressed, please take a btrfs-image of this file "
6885 "system and send it to a btrfs developer so they can "
6886 "complete this functionality for bytenr %Lu\n",
6887 dback->disk_bytenr);
6892 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6893 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6894 } else if (dback->disk_bytenr > entry->bytenr) {
6895 u64 off_diff, offset;
6897 off_diff = dback->disk_bytenr - entry->bytenr;
6898 offset = btrfs_file_extent_offset(leaf, fi);
6899 if (dback->disk_bytenr + offset +
6900 btrfs_file_extent_num_bytes(leaf, fi) >
6901 entry->bytenr + entry->bytes) {
6902 fprintf(stderr, "Ref is past the entry end, please "
6903 "take a btrfs-image of this file system and "
6904 "send it to a btrfs developer, ref %Lu\n",
6905 dback->disk_bytenr);
6910 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6911 btrfs_set_file_extent_offset(leaf, fi, offset);
6912 } else if (dback->disk_bytenr < entry->bytenr) {
6915 offset = btrfs_file_extent_offset(leaf, fi);
6916 if (dback->disk_bytenr + offset < entry->bytenr) {
6917 fprintf(stderr, "Ref is before the entry start, please"
6918 " take a btrfs-image of this file system and "
6919 "send it to a btrfs developer, ref %Lu\n",
6920 dback->disk_bytenr);
6925 offset += dback->disk_bytenr;
6926 offset -= entry->bytenr;
6927 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6928 btrfs_set_file_extent_offset(leaf, fi, offset);
6931 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6934 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6935 * only do this if we aren't using compression, otherwise it's a
6938 if (!btrfs_file_extent_compression(leaf, fi))
6939 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6941 printf("ram bytes may be wrong?\n");
6942 btrfs_mark_buffer_dirty(leaf);
6944 err = btrfs_commit_transaction(trans, root);
6945 btrfs_release_path(path);
6946 return ret ? ret : err;
6949 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6950 struct extent_record *rec)
6952 struct extent_backref *back, *tmp;
6953 struct data_backref *dback;
6954 struct extent_entry *entry, *best = NULL;
6957 int broken_entries = 0;
6962 * Metadata is easy and the backrefs should always agree on bytenr and
6963 * size, if not we've got bigger issues.
6968 rbtree_postorder_for_each_entry_safe(back, tmp,
6969 &rec->backref_tree, node) {
6970 if (back->full_backref || !back->is_data)
6973 dback = to_data_backref(back);
6976 * We only pay attention to backrefs that we found a real
6979 if (dback->found_ref == 0)
6983 * For now we only catch when the bytes don't match, not the
6984 * bytenr. We can easily do this at the same time, but I want
6985 * to have a fs image to test on before we just add repair
6986 * functionality willy-nilly so we know we won't screw up the
6990 entry = find_entry(&entries, dback->disk_bytenr,
6993 entry = malloc(sizeof(struct extent_entry));
6998 memset(entry, 0, sizeof(*entry));
6999 entry->bytenr = dback->disk_bytenr;
7000 entry->bytes = dback->bytes;
7001 list_add_tail(&entry->list, &entries);
7006 * If we only have on entry we may think the entries agree when
7007 * in reality they don't so we have to do some extra checking.
7009 if (dback->disk_bytenr != rec->start ||
7010 dback->bytes != rec->nr || back->broken)
7021 /* Yay all the backrefs agree, carry on good sir */
7022 if (nr_entries <= 1 && !mismatch)
7025 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7026 "%Lu\n", rec->start);
7029 * First we want to see if the backrefs can agree amongst themselves who
7030 * is right, so figure out which one of the entries has the highest
7033 best = find_most_right_entry(&entries);
7036 * Ok so we may have an even split between what the backrefs think, so
7037 * this is where we use the extent ref to see what it thinks.
7040 entry = find_entry(&entries, rec->start, rec->nr);
7041 if (!entry && (!broken_entries || !rec->found_rec)) {
7042 fprintf(stderr, "Backrefs don't agree with each other "
7043 "and extent record doesn't agree with anybody,"
7044 " so we can't fix bytenr %Lu bytes %Lu\n",
7045 rec->start, rec->nr);
7048 } else if (!entry) {
7050 * Ok our backrefs were broken, we'll assume this is the
7051 * correct value and add an entry for this range.
7053 entry = malloc(sizeof(struct extent_entry));
7058 memset(entry, 0, sizeof(*entry));
7059 entry->bytenr = rec->start;
7060 entry->bytes = rec->nr;
7061 list_add_tail(&entry->list, &entries);
7065 best = find_most_right_entry(&entries);
7067 fprintf(stderr, "Backrefs and extent record evenly "
7068 "split on who is right, this is going to "
7069 "require user input to fix bytenr %Lu bytes "
7070 "%Lu\n", rec->start, rec->nr);
7077 * I don't think this can happen currently as we'll abort() if we catch
7078 * this case higher up, but in case somebody removes that we still can't
7079 * deal with it properly here yet, so just bail out of that's the case.
7081 if (best->bytenr != rec->start) {
7082 fprintf(stderr, "Extent start and backref starts don't match, "
7083 "please use btrfs-image on this file system and send "
7084 "it to a btrfs developer so they can make fsck fix "
7085 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7086 rec->start, rec->nr);
7092 * Ok great we all agreed on an extent record, let's go find the real
7093 * references and fix up the ones that don't match.
7095 rbtree_postorder_for_each_entry_safe(back, tmp,
7096 &rec->backref_tree, node) {
7097 if (back->full_backref || !back->is_data)
7100 dback = to_data_backref(back);
7103 * Still ignoring backrefs that don't have a real ref attached
7106 if (dback->found_ref == 0)
7109 if (dback->bytes == best->bytes &&
7110 dback->disk_bytenr == best->bytenr)
7113 ret = repair_ref(info, path, dback, best);
7119 * Ok we messed with the actual refs, which means we need to drop our
7120 * entire cache and go back and rescan. I know this is a huge pain and
7121 * adds a lot of extra work, but it's the only way to be safe. Once all
7122 * the backrefs agree we may not need to do anything to the extent
7127 while (!list_empty(&entries)) {
7128 entry = list_entry(entries.next, struct extent_entry, list);
7129 list_del_init(&entry->list);
7135 static int process_duplicates(struct btrfs_root *root,
7136 struct cache_tree *extent_cache,
7137 struct extent_record *rec)
7139 struct extent_record *good, *tmp;
7140 struct cache_extent *cache;
7144 * If we found a extent record for this extent then return, or if we
7145 * have more than one duplicate we are likely going to need to delete
7148 if (rec->found_rec || rec->num_duplicates > 1)
7151 /* Shouldn't happen but just in case */
7152 BUG_ON(!rec->num_duplicates);
7155 * So this happens if we end up with a backref that doesn't match the
7156 * actual extent entry. So either the backref is bad or the extent
7157 * entry is bad. Either way we want to have the extent_record actually
7158 * reflect what we found in the extent_tree, so we need to take the
7159 * duplicate out and use that as the extent_record since the only way we
7160 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7162 remove_cache_extent(extent_cache, &rec->cache);
7164 good = to_extent_record(rec->dups.next);
7165 list_del_init(&good->list);
7166 INIT_LIST_HEAD(&good->backrefs);
7167 INIT_LIST_HEAD(&good->dups);
7168 good->cache.start = good->start;
7169 good->cache.size = good->nr;
7170 good->content_checked = 0;
7171 good->owner_ref_checked = 0;
7172 good->num_duplicates = 0;
7173 good->refs = rec->refs;
7174 list_splice_init(&rec->backrefs, &good->backrefs);
7176 cache = lookup_cache_extent(extent_cache, good->start,
7180 tmp = container_of(cache, struct extent_record, cache);
7183 * If we find another overlapping extent and it's found_rec is
7184 * set then it's a duplicate and we need to try and delete
7187 if (tmp->found_rec || tmp->num_duplicates > 0) {
7188 if (list_empty(&good->list))
7189 list_add_tail(&good->list,
7190 &duplicate_extents);
7191 good->num_duplicates += tmp->num_duplicates + 1;
7192 list_splice_init(&tmp->dups, &good->dups);
7193 list_del_init(&tmp->list);
7194 list_add_tail(&tmp->list, &good->dups);
7195 remove_cache_extent(extent_cache, &tmp->cache);
7200 * Ok we have another non extent item backed extent rec, so lets
7201 * just add it to this extent and carry on like we did above.
7203 good->refs += tmp->refs;
7204 list_splice_init(&tmp->backrefs, &good->backrefs);
7205 remove_cache_extent(extent_cache, &tmp->cache);
7208 ret = insert_cache_extent(extent_cache, &good->cache);
7211 return good->num_duplicates ? 0 : 1;
7214 static int delete_duplicate_records(struct btrfs_root *root,
7215 struct extent_record *rec)
7217 struct btrfs_trans_handle *trans;
7218 LIST_HEAD(delete_list);
7219 struct btrfs_path *path;
7220 struct extent_record *tmp, *good, *n;
7223 struct btrfs_key key;
7225 path = btrfs_alloc_path();
7232 /* Find the record that covers all of the duplicates. */
7233 list_for_each_entry(tmp, &rec->dups, list) {
7234 if (good->start < tmp->start)
7236 if (good->nr > tmp->nr)
7239 if (tmp->start + tmp->nr < good->start + good->nr) {
7240 fprintf(stderr, "Ok we have overlapping extents that "
7241 "aren't completely covered by each other, this "
7242 "is going to require more careful thought. "
7243 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7244 tmp->start, tmp->nr, good->start, good->nr);
7251 list_add_tail(&rec->list, &delete_list);
7253 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7256 list_move_tail(&tmp->list, &delete_list);
7259 root = root->fs_info->extent_root;
7260 trans = btrfs_start_transaction(root, 1);
7261 if (IS_ERR(trans)) {
7262 ret = PTR_ERR(trans);
7266 list_for_each_entry(tmp, &delete_list, list) {
7267 if (tmp->found_rec == 0)
7269 key.objectid = tmp->start;
7270 key.type = BTRFS_EXTENT_ITEM_KEY;
7271 key.offset = tmp->nr;
7273 /* Shouldn't happen but just in case */
7274 if (tmp->metadata) {
7275 fprintf(stderr, "Well this shouldn't happen, extent "
7276 "record overlaps but is metadata? "
7277 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7281 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7287 ret = btrfs_del_item(trans, root, path);
7290 btrfs_release_path(path);
7293 err = btrfs_commit_transaction(trans, root);
7297 while (!list_empty(&delete_list)) {
7298 tmp = to_extent_record(delete_list.next);
7299 list_del_init(&tmp->list);
7305 while (!list_empty(&rec->dups)) {
7306 tmp = to_extent_record(rec->dups.next);
7307 list_del_init(&tmp->list);
7311 btrfs_free_path(path);
7313 if (!ret && !nr_del)
7314 rec->num_duplicates = 0;
7316 return ret ? ret : nr_del;
7319 static int find_possible_backrefs(struct btrfs_fs_info *info,
7320 struct btrfs_path *path,
7321 struct cache_tree *extent_cache,
7322 struct extent_record *rec)
7324 struct btrfs_root *root;
7325 struct extent_backref *back, *tmp;
7326 struct data_backref *dback;
7327 struct cache_extent *cache;
7328 struct btrfs_file_extent_item *fi;
7329 struct btrfs_key key;
7333 rbtree_postorder_for_each_entry_safe(back, tmp,
7334 &rec->backref_tree, node) {
7335 /* Don't care about full backrefs (poor unloved backrefs) */
7336 if (back->full_backref || !back->is_data)
7339 dback = to_data_backref(back);
7341 /* We found this one, we don't need to do a lookup */
7342 if (dback->found_ref)
7345 key.objectid = dback->root;
7346 key.type = BTRFS_ROOT_ITEM_KEY;
7347 key.offset = (u64)-1;
7349 root = btrfs_read_fs_root(info, &key);
7351 /* No root, definitely a bad ref, skip */
7352 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7354 /* Other err, exit */
7356 return PTR_ERR(root);
7358 key.objectid = dback->owner;
7359 key.type = BTRFS_EXTENT_DATA_KEY;
7360 key.offset = dback->offset;
7361 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7363 btrfs_release_path(path);
7366 /* Didn't find it, we can carry on */
7371 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7372 struct btrfs_file_extent_item);
7373 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7374 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7375 btrfs_release_path(path);
7376 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7378 struct extent_record *tmp;
7379 tmp = container_of(cache, struct extent_record, cache);
7382 * If we found an extent record for the bytenr for this
7383 * particular backref then we can't add it to our
7384 * current extent record. We only want to add backrefs
7385 * that don't have a corresponding extent item in the
7386 * extent tree since they likely belong to this record
7387 * and we need to fix it if it doesn't match bytenrs.
7393 dback->found_ref += 1;
7394 dback->disk_bytenr = bytenr;
7395 dback->bytes = bytes;
7398 * Set this so the verify backref code knows not to trust the
7399 * values in this backref.
7408 * Record orphan data ref into corresponding root.
7410 * Return 0 if the extent item contains data ref and recorded.
7411 * Return 1 if the extent item contains no useful data ref
7412 * On that case, it may contains only shared_dataref or metadata backref
7413 * or the file extent exists(this should be handled by the extent bytenr
7415 * Return <0 if something goes wrong.
7417 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7418 struct extent_record *rec)
7420 struct btrfs_key key;
7421 struct btrfs_root *dest_root;
7422 struct extent_backref *back, *tmp;
7423 struct data_backref *dback;
7424 struct orphan_data_extent *orphan;
7425 struct btrfs_path *path;
7426 int recorded_data_ref = 0;
7431 path = btrfs_alloc_path();
7434 rbtree_postorder_for_each_entry_safe(back, tmp,
7435 &rec->backref_tree, node) {
7436 if (back->full_backref || !back->is_data ||
7437 !back->found_extent_tree)
7439 dback = to_data_backref(back);
7440 if (dback->found_ref)
7442 key.objectid = dback->root;
7443 key.type = BTRFS_ROOT_ITEM_KEY;
7444 key.offset = (u64)-1;
7446 dest_root = btrfs_read_fs_root(fs_info, &key);
7448 /* For non-exist root we just skip it */
7449 if (IS_ERR(dest_root) || !dest_root)
7452 key.objectid = dback->owner;
7453 key.type = BTRFS_EXTENT_DATA_KEY;
7454 key.offset = dback->offset;
7456 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7458 * For ret < 0, it's OK since the fs-tree may be corrupted,
7459 * we need to record it for inode/file extent rebuild.
7460 * For ret > 0, we record it only for file extent rebuild.
7461 * For ret == 0, the file extent exists but only bytenr
7462 * mismatch, let the original bytenr fix routine to handle,
7468 orphan = malloc(sizeof(*orphan));
7473 INIT_LIST_HEAD(&orphan->list);
7474 orphan->root = dback->root;
7475 orphan->objectid = dback->owner;
7476 orphan->offset = dback->offset;
7477 orphan->disk_bytenr = rec->cache.start;
7478 orphan->disk_len = rec->cache.size;
7479 list_add(&dest_root->orphan_data_extents, &orphan->list);
7480 recorded_data_ref = 1;
7483 btrfs_free_path(path);
7485 return !recorded_data_ref;
7491 * when an incorrect extent item is found, this will delete
7492 * all of the existing entries for it and recreate them
7493 * based on what the tree scan found.
7495 static int fixup_extent_refs(struct btrfs_fs_info *info,
7496 struct cache_tree *extent_cache,
7497 struct extent_record *rec)
7499 struct btrfs_trans_handle *trans = NULL;
7501 struct btrfs_path *path;
7502 struct cache_extent *cache;
7503 struct extent_backref *back, *tmp;
7507 if (rec->flag_block_full_backref)
7508 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7510 path = btrfs_alloc_path();
7514 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7516 * Sometimes the backrefs themselves are so broken they don't
7517 * get attached to any meaningful rec, so first go back and
7518 * check any of our backrefs that we couldn't find and throw
7519 * them into the list if we find the backref so that
7520 * verify_backrefs can figure out what to do.
7522 ret = find_possible_backrefs(info, path, extent_cache, rec);
7527 /* step one, make sure all of the backrefs agree */
7528 ret = verify_backrefs(info, path, rec);
7532 trans = btrfs_start_transaction(info->extent_root, 1);
7533 if (IS_ERR(trans)) {
7534 ret = PTR_ERR(trans);
7538 /* step two, delete all the existing records */
7539 ret = delete_extent_records(trans, info->extent_root, path,
7540 rec->start, rec->max_size);
7545 /* was this block corrupt? If so, don't add references to it */
7546 cache = lookup_cache_extent(info->corrupt_blocks,
7547 rec->start, rec->max_size);
7553 /* step three, recreate all the refs we did find */
7554 rbtree_postorder_for_each_entry_safe(back, tmp,
7555 &rec->backref_tree, node) {
7557 * if we didn't find any references, don't create a
7560 if (!back->found_ref)
7563 rec->bad_full_backref = 0;
7564 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7572 int err = btrfs_commit_transaction(trans, info->extent_root);
7577 btrfs_free_path(path);
7581 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7582 struct extent_record *rec)
7584 struct btrfs_trans_handle *trans;
7585 struct btrfs_root *root = fs_info->extent_root;
7586 struct btrfs_path *path;
7587 struct btrfs_extent_item *ei;
7588 struct btrfs_key key;
7592 key.objectid = rec->start;
7593 if (rec->metadata) {
7594 key.type = BTRFS_METADATA_ITEM_KEY;
7595 key.offset = rec->info_level;
7597 key.type = BTRFS_EXTENT_ITEM_KEY;
7598 key.offset = rec->max_size;
7601 path = btrfs_alloc_path();
7605 trans = btrfs_start_transaction(root, 0);
7606 if (IS_ERR(trans)) {
7607 btrfs_free_path(path);
7608 return PTR_ERR(trans);
7611 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7613 btrfs_free_path(path);
7614 btrfs_commit_transaction(trans, root);
7617 fprintf(stderr, "Didn't find extent for %llu\n",
7618 (unsigned long long)rec->start);
7619 btrfs_free_path(path);
7620 btrfs_commit_transaction(trans, root);
7624 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7625 struct btrfs_extent_item);
7626 flags = btrfs_extent_flags(path->nodes[0], ei);
7627 if (rec->flag_block_full_backref) {
7628 fprintf(stderr, "setting full backref on %llu\n",
7629 (unsigned long long)key.objectid);
7630 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7632 fprintf(stderr, "clearing full backref on %llu\n",
7633 (unsigned long long)key.objectid);
7634 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7636 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7637 btrfs_mark_buffer_dirty(path->nodes[0]);
7638 btrfs_free_path(path);
7639 return btrfs_commit_transaction(trans, root);
7642 /* right now we only prune from the extent allocation tree */
7643 static int prune_one_block(struct btrfs_trans_handle *trans,
7644 struct btrfs_fs_info *info,
7645 struct btrfs_corrupt_block *corrupt)
7648 struct btrfs_path path;
7649 struct extent_buffer *eb;
7653 int level = corrupt->level + 1;
7655 btrfs_init_path(&path);
7657 /* we want to stop at the parent to our busted block */
7658 path.lowest_level = level;
7660 ret = btrfs_search_slot(trans, info->extent_root,
7661 &corrupt->key, &path, -1, 1);
7666 eb = path.nodes[level];
7673 * hopefully the search gave us the block we want to prune,
7674 * lets try that first
7676 slot = path.slots[level];
7677 found = btrfs_node_blockptr(eb, slot);
7678 if (found == corrupt->cache.start)
7681 nritems = btrfs_header_nritems(eb);
7683 /* the search failed, lets scan this node and hope we find it */
7684 for (slot = 0; slot < nritems; slot++) {
7685 found = btrfs_node_blockptr(eb, slot);
7686 if (found == corrupt->cache.start)
7690 * we couldn't find the bad block. TODO, search all the nodes for pointers
7693 if (eb == info->extent_root->node) {
7698 btrfs_release_path(&path);
7703 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7704 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7707 btrfs_release_path(&path);
7711 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7713 struct btrfs_trans_handle *trans = NULL;
7714 struct cache_extent *cache;
7715 struct btrfs_corrupt_block *corrupt;
7718 cache = search_cache_extent(info->corrupt_blocks, 0);
7722 trans = btrfs_start_transaction(info->extent_root, 1);
7724 return PTR_ERR(trans);
7726 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7727 prune_one_block(trans, info, corrupt);
7728 remove_cache_extent(info->corrupt_blocks, cache);
7731 return btrfs_commit_transaction(trans, info->extent_root);
7735 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7737 struct btrfs_block_group_cache *cache;
7742 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7743 &start, &end, EXTENT_DIRTY);
7746 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7752 cache = btrfs_lookup_first_block_group(fs_info, start);
7757 start = cache->key.objectid + cache->key.offset;
7761 static int check_extent_refs(struct btrfs_root *root,
7762 struct cache_tree *extent_cache)
7764 struct extent_record *rec;
7765 struct cache_extent *cache;
7774 * if we're doing a repair, we have to make sure
7775 * we don't allocate from the problem extents.
7776 * In the worst case, this will be all the
7779 cache = search_cache_extent(extent_cache, 0);
7781 rec = container_of(cache, struct extent_record, cache);
7782 set_extent_dirty(root->fs_info->excluded_extents,
7784 rec->start + rec->max_size - 1,
7786 cache = next_cache_extent(cache);
7789 /* pin down all the corrupted blocks too */
7790 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7792 set_extent_dirty(root->fs_info->excluded_extents,
7794 cache->start + cache->size - 1,
7796 cache = next_cache_extent(cache);
7798 prune_corrupt_blocks(root->fs_info);
7799 reset_cached_block_groups(root->fs_info);
7802 reset_cached_block_groups(root->fs_info);
7805 * We need to delete any duplicate entries we find first otherwise we
7806 * could mess up the extent tree when we have backrefs that actually
7807 * belong to a different extent item and not the weird duplicate one.
7809 while (repair && !list_empty(&duplicate_extents)) {
7810 rec = to_extent_record(duplicate_extents.next);
7811 list_del_init(&rec->list);
7813 /* Sometimes we can find a backref before we find an actual
7814 * extent, so we need to process it a little bit to see if there
7815 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7816 * if this is a backref screwup. If we need to delete stuff
7817 * process_duplicates() will return 0, otherwise it will return
7820 if (process_duplicates(root, extent_cache, rec))
7822 ret = delete_duplicate_records(root, rec);
7826 * delete_duplicate_records will return the number of entries
7827 * deleted, so if it's greater than 0 then we know we actually
7828 * did something and we need to remove.
7842 cache = search_cache_extent(extent_cache, 0);
7845 rec = container_of(cache, struct extent_record, cache);
7846 if (rec->num_duplicates) {
7847 fprintf(stderr, "extent item %llu has multiple extent "
7848 "items\n", (unsigned long long)rec->start);
7853 if (rec->refs != rec->extent_item_refs) {
7854 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7855 (unsigned long long)rec->start,
7856 (unsigned long long)rec->nr);
7857 fprintf(stderr, "extent item %llu, found %llu\n",
7858 (unsigned long long)rec->extent_item_refs,
7859 (unsigned long long)rec->refs);
7860 ret = record_orphan_data_extents(root->fs_info, rec);
7867 * we can't use the extent to repair file
7868 * extent, let the fallback method handle it.
7870 if (!fixed && repair) {
7871 ret = fixup_extent_refs(
7882 if (all_backpointers_checked(rec, 1)) {
7883 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7884 (unsigned long long)rec->start,
7885 (unsigned long long)rec->nr);
7887 if (!fixed && !recorded && repair) {
7888 ret = fixup_extent_refs(root->fs_info,
7897 if (!rec->owner_ref_checked) {
7898 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7899 (unsigned long long)rec->start,
7900 (unsigned long long)rec->nr);
7901 if (!fixed && !recorded && repair) {
7902 ret = fixup_extent_refs(root->fs_info,
7911 if (rec->bad_full_backref) {
7912 fprintf(stderr, "bad full backref, on [%llu]\n",
7913 (unsigned long long)rec->start);
7915 ret = fixup_extent_flags(root->fs_info, rec);
7924 * Although it's not a extent ref's problem, we reuse this
7925 * routine for error reporting.
7926 * No repair function yet.
7928 if (rec->crossing_stripes) {
7930 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7931 rec->start, rec->start + rec->max_size);
7936 if (rec->wrong_chunk_type) {
7938 "bad extent [%llu, %llu), type mismatch with chunk\n",
7939 rec->start, rec->start + rec->max_size);
7944 remove_cache_extent(extent_cache, cache);
7945 free_all_extent_backrefs(rec);
7946 if (!init_extent_tree && repair && (!cur_err || fixed))
7947 clear_extent_dirty(root->fs_info->excluded_extents,
7949 rec->start + rec->max_size - 1,
7955 if (ret && ret != -EAGAIN) {
7956 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7959 struct btrfs_trans_handle *trans;
7961 root = root->fs_info->extent_root;
7962 trans = btrfs_start_transaction(root, 1);
7963 if (IS_ERR(trans)) {
7964 ret = PTR_ERR(trans);
7968 btrfs_fix_block_accounting(trans, root);
7969 ret = btrfs_commit_transaction(trans, root);
7974 fprintf(stderr, "repaired damaged extent references\n");
7980 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7984 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7985 stripe_size = length;
7986 stripe_size /= num_stripes;
7987 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7988 stripe_size = length * 2;
7989 stripe_size /= num_stripes;
7990 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7991 stripe_size = length;
7992 stripe_size /= (num_stripes - 1);
7993 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
7994 stripe_size = length;
7995 stripe_size /= (num_stripes - 2);
7997 stripe_size = length;
8003 * Check the chunk with its block group/dev list ref:
8004 * Return 0 if all refs seems valid.
8005 * Return 1 if part of refs seems valid, need later check for rebuild ref
8006 * like missing block group and needs to search extent tree to rebuild them.
8007 * Return -1 if essential refs are missing and unable to rebuild.
8009 static int check_chunk_refs(struct chunk_record *chunk_rec,
8010 struct block_group_tree *block_group_cache,
8011 struct device_extent_tree *dev_extent_cache,
8014 struct cache_extent *block_group_item;
8015 struct block_group_record *block_group_rec;
8016 struct cache_extent *dev_extent_item;
8017 struct device_extent_record *dev_extent_rec;
8021 int metadump_v2 = 0;
8025 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8028 if (block_group_item) {
8029 block_group_rec = container_of(block_group_item,
8030 struct block_group_record,
8032 if (chunk_rec->length != block_group_rec->offset ||
8033 chunk_rec->offset != block_group_rec->objectid ||
8035 chunk_rec->type_flags != block_group_rec->flags)) {
8038 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8039 chunk_rec->objectid,
8044 chunk_rec->type_flags,
8045 block_group_rec->objectid,
8046 block_group_rec->type,
8047 block_group_rec->offset,
8048 block_group_rec->offset,
8049 block_group_rec->objectid,
8050 block_group_rec->flags);
8053 list_del_init(&block_group_rec->list);
8054 chunk_rec->bg_rec = block_group_rec;
8059 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8060 chunk_rec->objectid,
8065 chunk_rec->type_flags);
8072 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8073 chunk_rec->num_stripes);
8074 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8075 devid = chunk_rec->stripes[i].devid;
8076 offset = chunk_rec->stripes[i].offset;
8077 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8078 devid, offset, length);
8079 if (dev_extent_item) {
8080 dev_extent_rec = container_of(dev_extent_item,
8081 struct device_extent_record,
8083 if (dev_extent_rec->objectid != devid ||
8084 dev_extent_rec->offset != offset ||
8085 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8086 dev_extent_rec->length != length) {
8089 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8090 chunk_rec->objectid,
8093 chunk_rec->stripes[i].devid,
8094 chunk_rec->stripes[i].offset,
8095 dev_extent_rec->objectid,
8096 dev_extent_rec->offset,
8097 dev_extent_rec->length);
8100 list_move(&dev_extent_rec->chunk_list,
8101 &chunk_rec->dextents);
8106 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8107 chunk_rec->objectid,
8110 chunk_rec->stripes[i].devid,
8111 chunk_rec->stripes[i].offset);
8118 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8119 int check_chunks(struct cache_tree *chunk_cache,
8120 struct block_group_tree *block_group_cache,
8121 struct device_extent_tree *dev_extent_cache,
8122 struct list_head *good, struct list_head *bad,
8123 struct list_head *rebuild, int silent)
8125 struct cache_extent *chunk_item;
8126 struct chunk_record *chunk_rec;
8127 struct block_group_record *bg_rec;
8128 struct device_extent_record *dext_rec;
8132 chunk_item = first_cache_extent(chunk_cache);
8133 while (chunk_item) {
8134 chunk_rec = container_of(chunk_item, struct chunk_record,
8136 err = check_chunk_refs(chunk_rec, block_group_cache,
8137 dev_extent_cache, silent);
8140 if (err == 0 && good)
8141 list_add_tail(&chunk_rec->list, good);
8142 if (err > 0 && rebuild)
8143 list_add_tail(&chunk_rec->list, rebuild);
8145 list_add_tail(&chunk_rec->list, bad);
8146 chunk_item = next_cache_extent(chunk_item);
8149 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8152 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8160 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8164 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8175 static int check_device_used(struct device_record *dev_rec,
8176 struct device_extent_tree *dext_cache)
8178 struct cache_extent *cache;
8179 struct device_extent_record *dev_extent_rec;
8182 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8184 dev_extent_rec = container_of(cache,
8185 struct device_extent_record,
8187 if (dev_extent_rec->objectid != dev_rec->devid)
8190 list_del_init(&dev_extent_rec->device_list);
8191 total_byte += dev_extent_rec->length;
8192 cache = next_cache_extent(cache);
8195 if (total_byte != dev_rec->byte_used) {
8197 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8198 total_byte, dev_rec->byte_used, dev_rec->objectid,
8199 dev_rec->type, dev_rec->offset);
8206 /* check btrfs_dev_item -> btrfs_dev_extent */
8207 static int check_devices(struct rb_root *dev_cache,
8208 struct device_extent_tree *dev_extent_cache)
8210 struct rb_node *dev_node;
8211 struct device_record *dev_rec;
8212 struct device_extent_record *dext_rec;
8216 dev_node = rb_first(dev_cache);
8218 dev_rec = container_of(dev_node, struct device_record, node);
8219 err = check_device_used(dev_rec, dev_extent_cache);
8223 dev_node = rb_next(dev_node);
8225 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8228 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8229 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8236 static int add_root_item_to_list(struct list_head *head,
8237 u64 objectid, u64 bytenr, u64 last_snapshot,
8238 u8 level, u8 drop_level,
8239 int level_size, struct btrfs_key *drop_key)
8242 struct root_item_record *ri_rec;
8243 ri_rec = malloc(sizeof(*ri_rec));
8246 ri_rec->bytenr = bytenr;
8247 ri_rec->objectid = objectid;
8248 ri_rec->level = level;
8249 ri_rec->level_size = level_size;
8250 ri_rec->drop_level = drop_level;
8251 ri_rec->last_snapshot = last_snapshot;
8253 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8254 list_add_tail(&ri_rec->list, head);
8259 static void free_root_item_list(struct list_head *list)
8261 struct root_item_record *ri_rec;
8263 while (!list_empty(list)) {
8264 ri_rec = list_first_entry(list, struct root_item_record,
8266 list_del_init(&ri_rec->list);
8271 static int deal_root_from_list(struct list_head *list,
8272 struct btrfs_root *root,
8273 struct block_info *bits,
8275 struct cache_tree *pending,
8276 struct cache_tree *seen,
8277 struct cache_tree *reada,
8278 struct cache_tree *nodes,
8279 struct cache_tree *extent_cache,
8280 struct cache_tree *chunk_cache,
8281 struct rb_root *dev_cache,
8282 struct block_group_tree *block_group_cache,
8283 struct device_extent_tree *dev_extent_cache)
8288 while (!list_empty(list)) {
8289 struct root_item_record *rec;
8290 struct extent_buffer *buf;
8291 rec = list_entry(list->next,
8292 struct root_item_record, list);
8294 buf = read_tree_block(root->fs_info->tree_root,
8295 rec->bytenr, rec->level_size, 0);
8296 if (!extent_buffer_uptodate(buf)) {
8297 free_extent_buffer(buf);
8301 add_root_to_pending(buf, extent_cache, pending,
8302 seen, nodes, rec->objectid);
8304 * To rebuild extent tree, we need deal with snapshot
8305 * one by one, otherwise we deal with node firstly which
8306 * can maximize readahead.
8309 ret = run_next_block(root, bits, bits_nr, &last,
8310 pending, seen, reada, nodes,
8311 extent_cache, chunk_cache,
8312 dev_cache, block_group_cache,
8313 dev_extent_cache, rec);
8317 free_extent_buffer(buf);
8318 list_del(&rec->list);
8324 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8325 reada, nodes, extent_cache, chunk_cache,
8326 dev_cache, block_group_cache,
8327 dev_extent_cache, NULL);
8337 static int check_chunks_and_extents(struct btrfs_root *root)
8339 struct rb_root dev_cache;
8340 struct cache_tree chunk_cache;
8341 struct block_group_tree block_group_cache;
8342 struct device_extent_tree dev_extent_cache;
8343 struct cache_tree extent_cache;
8344 struct cache_tree seen;
8345 struct cache_tree pending;
8346 struct cache_tree reada;
8347 struct cache_tree nodes;
8348 struct extent_io_tree excluded_extents;
8349 struct cache_tree corrupt_blocks;
8350 struct btrfs_path path;
8351 struct btrfs_key key;
8352 struct btrfs_key found_key;
8354 struct block_info *bits;
8356 struct extent_buffer *leaf;
8358 struct btrfs_root_item ri;
8359 struct list_head dropping_trees;
8360 struct list_head normal_trees;
8361 struct btrfs_root *root1;
8366 dev_cache = RB_ROOT;
8367 cache_tree_init(&chunk_cache);
8368 block_group_tree_init(&block_group_cache);
8369 device_extent_tree_init(&dev_extent_cache);
8371 cache_tree_init(&extent_cache);
8372 cache_tree_init(&seen);
8373 cache_tree_init(&pending);
8374 cache_tree_init(&nodes);
8375 cache_tree_init(&reada);
8376 cache_tree_init(&corrupt_blocks);
8377 extent_io_tree_init(&excluded_extents);
8378 INIT_LIST_HEAD(&dropping_trees);
8379 INIT_LIST_HEAD(&normal_trees);
8382 root->fs_info->excluded_extents = &excluded_extents;
8383 root->fs_info->fsck_extent_cache = &extent_cache;
8384 root->fs_info->free_extent_hook = free_extent_hook;
8385 root->fs_info->corrupt_blocks = &corrupt_blocks;
8389 bits = malloc(bits_nr * sizeof(struct block_info));
8395 if (ctx.progress_enabled) {
8396 ctx.tp = TASK_EXTENTS;
8397 task_start(ctx.info);
8401 root1 = root->fs_info->tree_root;
8402 level = btrfs_header_level(root1->node);
8403 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8404 root1->node->start, 0, level, 0,
8405 root1->nodesize, NULL);
8408 root1 = root->fs_info->chunk_root;
8409 level = btrfs_header_level(root1->node);
8410 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8411 root1->node->start, 0, level, 0,
8412 root1->nodesize, NULL);
8415 btrfs_init_path(&path);
8418 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8419 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8424 leaf = path.nodes[0];
8425 slot = path.slots[0];
8426 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8427 ret = btrfs_next_leaf(root, &path);
8430 leaf = path.nodes[0];
8431 slot = path.slots[0];
8433 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8434 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8435 unsigned long offset;
8438 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8439 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8440 last_snapshot = btrfs_root_last_snapshot(&ri);
8441 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8442 level = btrfs_root_level(&ri);
8443 level_size = root->nodesize;
8444 ret = add_root_item_to_list(&normal_trees,
8446 btrfs_root_bytenr(&ri),
8447 last_snapshot, level,
8448 0, level_size, NULL);
8452 level = btrfs_root_level(&ri);
8453 level_size = root->nodesize;
8454 objectid = found_key.objectid;
8455 btrfs_disk_key_to_cpu(&found_key,
8457 ret = add_root_item_to_list(&dropping_trees,
8459 btrfs_root_bytenr(&ri),
8460 last_snapshot, level,
8462 level_size, &found_key);
8469 btrfs_release_path(&path);
8472 * check_block can return -EAGAIN if it fixes something, please keep
8473 * this in mind when dealing with return values from these functions, if
8474 * we get -EAGAIN we want to fall through and restart the loop.
8476 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8477 &seen, &reada, &nodes, &extent_cache,
8478 &chunk_cache, &dev_cache, &block_group_cache,
8485 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8486 &pending, &seen, &reada, &nodes,
8487 &extent_cache, &chunk_cache, &dev_cache,
8488 &block_group_cache, &dev_extent_cache);
8495 ret = check_chunks(&chunk_cache, &block_group_cache,
8496 &dev_extent_cache, NULL, NULL, NULL, 0);
8503 ret = check_extent_refs(root, &extent_cache);
8510 ret = check_devices(&dev_cache, &dev_extent_cache);
8515 task_stop(ctx.info);
8517 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8518 extent_io_tree_cleanup(&excluded_extents);
8519 root->fs_info->fsck_extent_cache = NULL;
8520 root->fs_info->free_extent_hook = NULL;
8521 root->fs_info->corrupt_blocks = NULL;
8522 root->fs_info->excluded_extents = NULL;
8525 free_chunk_cache_tree(&chunk_cache);
8526 free_device_cache_tree(&dev_cache);
8527 free_block_group_tree(&block_group_cache);
8528 free_device_extent_tree(&dev_extent_cache);
8529 free_extent_cache_tree(&seen);
8530 free_extent_cache_tree(&pending);
8531 free_extent_cache_tree(&reada);
8532 free_extent_cache_tree(&nodes);
8535 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8536 free_extent_cache_tree(&seen);
8537 free_extent_cache_tree(&pending);
8538 free_extent_cache_tree(&reada);
8539 free_extent_cache_tree(&nodes);
8540 free_chunk_cache_tree(&chunk_cache);
8541 free_block_group_tree(&block_group_cache);
8542 free_device_cache_tree(&dev_cache);
8543 free_device_extent_tree(&dev_extent_cache);
8544 free_extent_record_cache(root->fs_info, &extent_cache);
8545 free_root_item_list(&normal_trees);
8546 free_root_item_list(&dropping_trees);
8547 extent_io_tree_cleanup(&excluded_extents);
8552 * Check backrefs of a tree block given by @bytenr or @eb.
8554 * @root: the root containing the @bytenr or @eb
8555 * @eb: tree block extent buffer, can be NULL
8556 * @bytenr: bytenr of the tree block to search
8557 * @level: tree level of the tree block
8558 * @owner: owner of the tree block
8560 * Return >0 for any error found and output error message
8561 * Return 0 for no error found
8563 static int check_tree_block_ref(struct btrfs_root *root,
8564 struct extent_buffer *eb, u64 bytenr,
8565 int level, u64 owner)
8567 struct btrfs_key key;
8568 struct btrfs_root *extent_root = root->fs_info->extent_root;
8569 struct btrfs_path path;
8570 struct btrfs_extent_item *ei;
8571 struct btrfs_extent_inline_ref *iref;
8572 struct extent_buffer *leaf;
8578 u32 nodesize = root->nodesize;
8585 btrfs_init_path(&path);
8586 key.objectid = bytenr;
8587 if (btrfs_fs_incompat(root->fs_info,
8588 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8589 key.type = BTRFS_METADATA_ITEM_KEY;
8591 key.type = BTRFS_EXTENT_ITEM_KEY;
8592 key.offset = (u64)-1;
8594 /* Search for the backref in extent tree */
8595 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8597 err |= BACKREF_MISSING;
8600 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8602 err |= BACKREF_MISSING;
8606 leaf = path.nodes[0];
8607 slot = path.slots[0];
8608 btrfs_item_key_to_cpu(leaf, &key, slot);
8610 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8612 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8613 skinny_level = (int)key.offset;
8614 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8616 struct btrfs_tree_block_info *info;
8618 info = (struct btrfs_tree_block_info *)(ei + 1);
8619 skinny_level = btrfs_tree_block_level(leaf, info);
8620 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8627 if (!(btrfs_extent_flags(leaf, ei) &
8628 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8630 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8631 key.objectid, nodesize,
8632 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8633 err = BACKREF_MISMATCH;
8635 header_gen = btrfs_header_generation(eb);
8636 extent_gen = btrfs_extent_generation(leaf, ei);
8637 if (header_gen != extent_gen) {
8639 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8640 key.objectid, nodesize, header_gen,
8642 err = BACKREF_MISMATCH;
8644 if (level != skinny_level) {
8646 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8647 key.objectid, nodesize, level, skinny_level);
8648 err = BACKREF_MISMATCH;
8650 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8652 "extent[%llu %u] is referred by other roots than %llu",
8653 key.objectid, nodesize, root->objectid);
8654 err = BACKREF_MISMATCH;
8659 * Iterate the extent/metadata item to find the exact backref
8661 item_size = btrfs_item_size_nr(leaf, slot);
8662 ptr = (unsigned long)iref;
8663 end = (unsigned long)ei + item_size;
8665 iref = (struct btrfs_extent_inline_ref *)ptr;
8666 type = btrfs_extent_inline_ref_type(leaf, iref);
8667 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8669 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8670 (offset == root->objectid || offset == owner)) {
8672 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8673 /* Check if the backref points to valid referencer */
8674 found_ref = !check_tree_block_ref(root, NULL, offset,
8680 ptr += btrfs_extent_inline_ref_size(type);
8684 * Inlined extent item doesn't have what we need, check
8685 * TREE_BLOCK_REF_KEY
8688 btrfs_release_path(&path);
8689 key.objectid = bytenr;
8690 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8691 key.offset = root->objectid;
8693 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8698 err |= BACKREF_MISSING;
8700 btrfs_release_path(&path);
8701 if (eb && (err & BACKREF_MISSING))
8702 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8703 bytenr, nodesize, owner, level);
8707 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
8708 struct btrfs_root *root, int overwrite)
8710 struct extent_buffer *c;
8711 struct extent_buffer *old = root->node;
8714 struct btrfs_disk_key disk_key = {0,0,0};
8720 extent_buffer_get(c);
8723 c = btrfs_alloc_free_block(trans, root,
8725 root->root_key.objectid,
8726 &disk_key, level, 0, 0);
8729 extent_buffer_get(c);
8733 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
8734 btrfs_set_header_level(c, level);
8735 btrfs_set_header_bytenr(c, c->start);
8736 btrfs_set_header_generation(c, trans->transid);
8737 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
8738 btrfs_set_header_owner(c, root->root_key.objectid);
8740 write_extent_buffer(c, root->fs_info->fsid,
8741 btrfs_header_fsid(), BTRFS_FSID_SIZE);
8743 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
8744 btrfs_header_chunk_tree_uuid(c),
8747 btrfs_mark_buffer_dirty(c);
8749 * this case can happen in the following case:
8751 * 1.overwrite previous root.
8753 * 2.reinit reloc data root, this is because we skip pin
8754 * down reloc data tree before which means we can allocate
8755 * same block bytenr here.
8757 if (old->start == c->start) {
8758 btrfs_set_root_generation(&root->root_item,
8760 root->root_item.level = btrfs_header_level(root->node);
8761 ret = btrfs_update_root(trans, root->fs_info->tree_root,
8762 &root->root_key, &root->root_item);
8764 free_extent_buffer(c);
8768 free_extent_buffer(old);
8770 add_root_to_dirty_list(root);
8774 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
8775 struct extent_buffer *eb, int tree_root)
8777 struct extent_buffer *tmp;
8778 struct btrfs_root_item *ri;
8779 struct btrfs_key key;
8782 int level = btrfs_header_level(eb);
8788 * If we have pinned this block before, don't pin it again.
8789 * This can not only avoid forever loop with broken filesystem
8790 * but also give us some speedups.
8792 if (test_range_bit(&fs_info->pinned_extents, eb->start,
8793 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
8796 btrfs_pin_extent(fs_info, eb->start, eb->len);
8798 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8799 nritems = btrfs_header_nritems(eb);
8800 for (i = 0; i < nritems; i++) {
8802 btrfs_item_key_to_cpu(eb, &key, i);
8803 if (key.type != BTRFS_ROOT_ITEM_KEY)
8805 /* Skip the extent root and reloc roots */
8806 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
8807 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8808 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
8810 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
8811 bytenr = btrfs_disk_root_bytenr(eb, ri);
8814 * If at any point we start needing the real root we
8815 * will have to build a stump root for the root we are
8816 * in, but for now this doesn't actually use the root so
8817 * just pass in extent_root.
8819 tmp = read_tree_block(fs_info->extent_root, bytenr,
8821 if (!extent_buffer_uptodate(tmp)) {
8822 fprintf(stderr, "Error reading root block\n");
8825 ret = pin_down_tree_blocks(fs_info, tmp, 0);
8826 free_extent_buffer(tmp);
8830 bytenr = btrfs_node_blockptr(eb, i);
8832 /* If we aren't the tree root don't read the block */
8833 if (level == 1 && !tree_root) {
8834 btrfs_pin_extent(fs_info, bytenr, nodesize);
8838 tmp = read_tree_block(fs_info->extent_root, bytenr,
8840 if (!extent_buffer_uptodate(tmp)) {
8841 fprintf(stderr, "Error reading tree block\n");
8844 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
8845 free_extent_buffer(tmp);
8854 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
8858 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
8862 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
8865 static int reset_block_groups(struct btrfs_fs_info *fs_info)
8867 struct btrfs_block_group_cache *cache;
8868 struct btrfs_path *path;
8869 struct extent_buffer *leaf;
8870 struct btrfs_chunk *chunk;
8871 struct btrfs_key key;
8875 path = btrfs_alloc_path();
8880 key.type = BTRFS_CHUNK_ITEM_KEY;
8883 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
8885 btrfs_free_path(path);
8890 * We do this in case the block groups were screwed up and had alloc
8891 * bits that aren't actually set on the chunks. This happens with
8892 * restored images every time and could happen in real life I guess.
8894 fs_info->avail_data_alloc_bits = 0;
8895 fs_info->avail_metadata_alloc_bits = 0;
8896 fs_info->avail_system_alloc_bits = 0;
8898 /* First we need to create the in-memory block groups */
8900 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8901 ret = btrfs_next_leaf(fs_info->chunk_root, path);
8903 btrfs_free_path(path);
8911 leaf = path->nodes[0];
8912 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
8913 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
8918 chunk = btrfs_item_ptr(leaf, path->slots[0],
8919 struct btrfs_chunk);
8920 btrfs_add_block_group(fs_info, 0,
8921 btrfs_chunk_type(leaf, chunk),
8922 key.objectid, key.offset,
8923 btrfs_chunk_length(leaf, chunk));
8924 set_extent_dirty(&fs_info->free_space_cache, key.offset,
8925 key.offset + btrfs_chunk_length(leaf, chunk),
8931 cache = btrfs_lookup_first_block_group(fs_info, start);
8935 start = cache->key.objectid + cache->key.offset;
8938 btrfs_free_path(path);
8942 static int reset_balance(struct btrfs_trans_handle *trans,
8943 struct btrfs_fs_info *fs_info)
8945 struct btrfs_root *root = fs_info->tree_root;
8946 struct btrfs_path *path;
8947 struct extent_buffer *leaf;
8948 struct btrfs_key key;
8949 int del_slot, del_nr = 0;
8953 path = btrfs_alloc_path();
8957 key.objectid = BTRFS_BALANCE_OBJECTID;
8958 key.type = BTRFS_BALANCE_ITEM_KEY;
8961 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8966 goto reinit_data_reloc;
8971 ret = btrfs_del_item(trans, root, path);
8974 btrfs_release_path(path);
8976 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
8977 key.type = BTRFS_ROOT_ITEM_KEY;
8980 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8984 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8989 ret = btrfs_del_items(trans, root, path,
8996 btrfs_release_path(path);
8999 ret = btrfs_search_slot(trans, root, &key, path,
9006 leaf = path->nodes[0];
9007 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9008 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9010 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9015 del_slot = path->slots[0];
9024 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9028 btrfs_release_path(path);
9031 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9032 key.type = BTRFS_ROOT_ITEM_KEY;
9033 key.offset = (u64)-1;
9034 root = btrfs_read_fs_root(fs_info, &key);
9036 fprintf(stderr, "Error reading data reloc tree\n");
9037 ret = PTR_ERR(root);
9040 record_root_in_trans(trans, root);
9041 ret = btrfs_fsck_reinit_root(trans, root, 0);
9044 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9046 btrfs_free_path(path);
9050 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9051 struct btrfs_fs_info *fs_info)
9057 * The only reason we don't do this is because right now we're just
9058 * walking the trees we find and pinning down their bytes, we don't look
9059 * at any of the leaves. In order to do mixed groups we'd have to check
9060 * the leaves of any fs roots and pin down the bytes for any file
9061 * extents we find. Not hard but why do it if we don't have to?
9063 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9064 fprintf(stderr, "We don't support re-initing the extent tree "
9065 "for mixed block groups yet, please notify a btrfs "
9066 "developer you want to do this so they can add this "
9067 "functionality.\n");
9072 * first we need to walk all of the trees except the extent tree and pin
9073 * down the bytes that are in use so we don't overwrite any existing
9076 ret = pin_metadata_blocks(fs_info);
9078 fprintf(stderr, "error pinning down used bytes\n");
9083 * Need to drop all the block groups since we're going to recreate all
9086 btrfs_free_block_groups(fs_info);
9087 ret = reset_block_groups(fs_info);
9089 fprintf(stderr, "error resetting the block groups\n");
9093 /* Ok we can allocate now, reinit the extent root */
9094 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9096 fprintf(stderr, "extent root initialization failed\n");
9098 * When the transaction code is updated we should end the
9099 * transaction, but for now progs only knows about commit so
9100 * just return an error.
9106 * Now we have all the in-memory block groups setup so we can make
9107 * allocations properly, and the metadata we care about is safe since we
9108 * pinned all of it above.
9111 struct btrfs_block_group_cache *cache;
9113 cache = btrfs_lookup_first_block_group(fs_info, start);
9116 start = cache->key.objectid + cache->key.offset;
9117 ret = btrfs_insert_item(trans, fs_info->extent_root,
9118 &cache->key, &cache->item,
9119 sizeof(cache->item));
9121 fprintf(stderr, "Error adding block group\n");
9124 btrfs_extent_post_op(trans, fs_info->extent_root);
9127 ret = reset_balance(trans, fs_info);
9129 fprintf(stderr, "error resetting the pending balance\n");
9134 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
9136 struct btrfs_path *path;
9137 struct btrfs_trans_handle *trans;
9138 struct btrfs_key key;
9141 printf("Recowing metadata block %llu\n", eb->start);
9142 key.objectid = btrfs_header_owner(eb);
9143 key.type = BTRFS_ROOT_ITEM_KEY;
9144 key.offset = (u64)-1;
9146 root = btrfs_read_fs_root(root->fs_info, &key);
9148 fprintf(stderr, "Couldn't find owner root %llu\n",
9150 return PTR_ERR(root);
9153 path = btrfs_alloc_path();
9157 trans = btrfs_start_transaction(root, 1);
9158 if (IS_ERR(trans)) {
9159 btrfs_free_path(path);
9160 return PTR_ERR(trans);
9163 path->lowest_level = btrfs_header_level(eb);
9164 if (path->lowest_level)
9165 btrfs_node_key_to_cpu(eb, &key, 0);
9167 btrfs_item_key_to_cpu(eb, &key, 0);
9169 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9170 btrfs_commit_transaction(trans, root);
9171 btrfs_free_path(path);
9175 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
9177 struct btrfs_path *path;
9178 struct btrfs_trans_handle *trans;
9179 struct btrfs_key key;
9182 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
9183 bad->key.type, bad->key.offset);
9184 key.objectid = bad->root_id;
9185 key.type = BTRFS_ROOT_ITEM_KEY;
9186 key.offset = (u64)-1;
9188 root = btrfs_read_fs_root(root->fs_info, &key);
9190 fprintf(stderr, "Couldn't find owner root %llu\n",
9192 return PTR_ERR(root);
9195 path = btrfs_alloc_path();
9199 trans = btrfs_start_transaction(root, 1);
9200 if (IS_ERR(trans)) {
9201 btrfs_free_path(path);
9202 return PTR_ERR(trans);
9205 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
9211 ret = btrfs_del_item(trans, root, path);
9213 btrfs_commit_transaction(trans, root);
9214 btrfs_free_path(path);
9218 static int zero_log_tree(struct btrfs_root *root)
9220 struct btrfs_trans_handle *trans;
9223 trans = btrfs_start_transaction(root, 1);
9224 if (IS_ERR(trans)) {
9225 ret = PTR_ERR(trans);
9228 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
9229 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
9230 ret = btrfs_commit_transaction(trans, root);
9234 static int populate_csum(struct btrfs_trans_handle *trans,
9235 struct btrfs_root *csum_root, char *buf, u64 start,
9242 while (offset < len) {
9243 sectorsize = csum_root->sectorsize;
9244 ret = read_extent_data(csum_root, buf, start + offset,
9248 ret = btrfs_csum_file_block(trans, csum_root, start + len,
9249 start + offset, buf, sectorsize);
9252 offset += sectorsize;
9257 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
9258 struct btrfs_root *csum_root,
9259 struct btrfs_root *cur_root)
9261 struct btrfs_path *path;
9262 struct btrfs_key key;
9263 struct extent_buffer *node;
9264 struct btrfs_file_extent_item *fi;
9271 path = btrfs_alloc_path();
9274 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
9284 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
9287 /* Iterate all regular file extents and fill its csum */
9289 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
9291 if (key.type != BTRFS_EXTENT_DATA_KEY)
9293 node = path->nodes[0];
9294 slot = path->slots[0];
9295 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
9296 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
9298 start = btrfs_file_extent_disk_bytenr(node, fi);
9299 len = btrfs_file_extent_disk_num_bytes(node, fi);
9301 ret = populate_csum(trans, csum_root, buf, start, len);
9308 * TODO: if next leaf is corrupted, jump to nearest next valid
9311 ret = btrfs_next_item(cur_root, path);
9321 btrfs_free_path(path);
9326 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
9327 struct btrfs_root *csum_root)
9329 struct btrfs_fs_info *fs_info = csum_root->fs_info;
9330 struct btrfs_path *path;
9331 struct btrfs_root *tree_root = fs_info->tree_root;
9332 struct btrfs_root *cur_root;
9333 struct extent_buffer *node;
9334 struct btrfs_key key;
9338 path = btrfs_alloc_path();
9342 key.objectid = BTRFS_FS_TREE_OBJECTID;
9344 key.type = BTRFS_ROOT_ITEM_KEY;
9346 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
9355 node = path->nodes[0];
9356 slot = path->slots[0];
9357 btrfs_item_key_to_cpu(node, &key, slot);
9358 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
9360 if (key.type != BTRFS_ROOT_ITEM_KEY)
9362 if (!is_fstree(key.objectid))
9364 key.offset = (u64)-1;
9366 cur_root = btrfs_read_fs_root(fs_info, &key);
9367 if (IS_ERR(cur_root) || !cur_root) {
9368 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
9372 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
9377 ret = btrfs_next_item(tree_root, path);
9387 btrfs_free_path(path);
9391 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
9392 struct btrfs_root *csum_root)
9394 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
9395 struct btrfs_path *path;
9396 struct btrfs_extent_item *ei;
9397 struct extent_buffer *leaf;
9399 struct btrfs_key key;
9402 path = btrfs_alloc_path();
9407 key.type = BTRFS_EXTENT_ITEM_KEY;
9410 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
9412 btrfs_free_path(path);
9416 buf = malloc(csum_root->sectorsize);
9418 btrfs_free_path(path);
9423 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9424 ret = btrfs_next_leaf(extent_root, path);
9432 leaf = path->nodes[0];
9434 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9435 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
9440 ei = btrfs_item_ptr(leaf, path->slots[0],
9441 struct btrfs_extent_item);
9442 if (!(btrfs_extent_flags(leaf, ei) &
9443 BTRFS_EXTENT_FLAG_DATA)) {
9448 ret = populate_csum(trans, csum_root, buf, key.objectid,
9455 btrfs_free_path(path);
9461 * Recalculate the csum and put it into the csum tree.
9463 * Extent tree init will wipe out all the extent info, so in that case, we
9464 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
9465 * will use fs/subvol trees to init the csum tree.
9467 static int fill_csum_tree(struct btrfs_trans_handle *trans,
9468 struct btrfs_root *csum_root,
9472 return fill_csum_tree_from_fs(trans, csum_root);
9474 return fill_csum_tree_from_extent(trans, csum_root);
9477 static void free_roots_info_cache(void)
9479 if (!roots_info_cache)
9482 while (!cache_tree_empty(roots_info_cache)) {
9483 struct cache_extent *entry;
9484 struct root_item_info *rii;
9486 entry = first_cache_extent(roots_info_cache);
9489 remove_cache_extent(roots_info_cache, entry);
9490 rii = container_of(entry, struct root_item_info, cache_extent);
9494 free(roots_info_cache);
9495 roots_info_cache = NULL;
9498 static int build_roots_info_cache(struct btrfs_fs_info *info)
9501 struct btrfs_key key;
9502 struct extent_buffer *leaf;
9503 struct btrfs_path *path;
9505 if (!roots_info_cache) {
9506 roots_info_cache = malloc(sizeof(*roots_info_cache));
9507 if (!roots_info_cache)
9509 cache_tree_init(roots_info_cache);
9512 path = btrfs_alloc_path();
9517 key.type = BTRFS_EXTENT_ITEM_KEY;
9520 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
9523 leaf = path->nodes[0];
9526 struct btrfs_key found_key;
9527 struct btrfs_extent_item *ei;
9528 struct btrfs_extent_inline_ref *iref;
9529 int slot = path->slots[0];
9534 struct cache_extent *entry;
9535 struct root_item_info *rii;
9537 if (slot >= btrfs_header_nritems(leaf)) {
9538 ret = btrfs_next_leaf(info->extent_root, path);
9545 leaf = path->nodes[0];
9546 slot = path->slots[0];
9549 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9551 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9552 found_key.type != BTRFS_METADATA_ITEM_KEY)
9555 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
9556 flags = btrfs_extent_flags(leaf, ei);
9558 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
9559 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
9562 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
9563 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
9564 level = found_key.offset;
9566 struct btrfs_tree_block_info *binfo;
9568 binfo = (struct btrfs_tree_block_info *)(ei + 1);
9569 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
9570 level = btrfs_tree_block_level(leaf, binfo);
9574 * For a root extent, it must be of the following type and the
9575 * first (and only one) iref in the item.
9577 type = btrfs_extent_inline_ref_type(leaf, iref);
9578 if (type != BTRFS_TREE_BLOCK_REF_KEY)
9581 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
9582 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9584 rii = malloc(sizeof(struct root_item_info));
9589 rii->cache_extent.start = root_id;
9590 rii->cache_extent.size = 1;
9591 rii->level = (u8)-1;
9592 entry = &rii->cache_extent;
9593 ret = insert_cache_extent(roots_info_cache, entry);
9596 rii = container_of(entry, struct root_item_info,
9600 ASSERT(rii->cache_extent.start == root_id);
9601 ASSERT(rii->cache_extent.size == 1);
9603 if (level > rii->level || rii->level == (u8)-1) {
9605 rii->bytenr = found_key.objectid;
9606 rii->gen = btrfs_extent_generation(leaf, ei);
9607 rii->node_count = 1;
9608 } else if (level == rii->level) {
9616 btrfs_free_path(path);
9621 static int maybe_repair_root_item(struct btrfs_fs_info *info,
9622 struct btrfs_path *path,
9623 const struct btrfs_key *root_key,
9624 const int read_only_mode)
9626 const u64 root_id = root_key->objectid;
9627 struct cache_extent *entry;
9628 struct root_item_info *rii;
9629 struct btrfs_root_item ri;
9630 unsigned long offset;
9632 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
9635 "Error: could not find extent items for root %llu\n",
9636 root_key->objectid);
9640 rii = container_of(entry, struct root_item_info, cache_extent);
9641 ASSERT(rii->cache_extent.start == root_id);
9642 ASSERT(rii->cache_extent.size == 1);
9644 if (rii->node_count != 1) {
9646 "Error: could not find btree root extent for root %llu\n",
9651 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
9652 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
9654 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
9655 btrfs_root_level(&ri) != rii->level ||
9656 btrfs_root_generation(&ri) != rii->gen) {
9659 * If we're in repair mode but our caller told us to not update
9660 * the root item, i.e. just check if it needs to be updated, don't
9661 * print this message, since the caller will call us again shortly
9662 * for the same root item without read only mode (the caller will
9663 * open a transaction first).
9665 if (!(read_only_mode && repair))
9667 "%sroot item for root %llu,"
9668 " current bytenr %llu, current gen %llu, current level %u,"
9669 " new bytenr %llu, new gen %llu, new level %u\n",
9670 (read_only_mode ? "" : "fixing "),
9672 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
9673 btrfs_root_level(&ri),
9674 rii->bytenr, rii->gen, rii->level);
9676 if (btrfs_root_generation(&ri) > rii->gen) {
9678 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
9679 root_id, btrfs_root_generation(&ri), rii->gen);
9683 if (!read_only_mode) {
9684 btrfs_set_root_bytenr(&ri, rii->bytenr);
9685 btrfs_set_root_level(&ri, rii->level);
9686 btrfs_set_root_generation(&ri, rii->gen);
9687 write_extent_buffer(path->nodes[0], &ri,
9688 offset, sizeof(ri));
9698 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
9699 * caused read-only snapshots to be corrupted if they were created at a moment
9700 * when the source subvolume/snapshot had orphan items. The issue was that the
9701 * on-disk root items became incorrect, referring to the pre orphan cleanup root
9702 * node instead of the post orphan cleanup root node.
9703 * So this function, and its callees, just detects and fixes those cases. Even
9704 * though the regression was for read-only snapshots, this function applies to
9705 * any snapshot/subvolume root.
9706 * This must be run before any other repair code - not doing it so, makes other
9707 * repair code delete or modify backrefs in the extent tree for example, which
9708 * will result in an inconsistent fs after repairing the root items.
9710 static int repair_root_items(struct btrfs_fs_info *info)
9712 struct btrfs_path *path = NULL;
9713 struct btrfs_key key;
9714 struct extent_buffer *leaf;
9715 struct btrfs_trans_handle *trans = NULL;
9720 ret = build_roots_info_cache(info);
9724 path = btrfs_alloc_path();
9730 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
9731 key.type = BTRFS_ROOT_ITEM_KEY;
9736 * Avoid opening and committing transactions if a leaf doesn't have
9737 * any root items that need to be fixed, so that we avoid rotating
9738 * backup roots unnecessarily.
9741 trans = btrfs_start_transaction(info->tree_root, 1);
9742 if (IS_ERR(trans)) {
9743 ret = PTR_ERR(trans);
9748 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
9752 leaf = path->nodes[0];
9755 struct btrfs_key found_key;
9757 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
9758 int no_more_keys = find_next_key(path, &key);
9760 btrfs_release_path(path);
9762 ret = btrfs_commit_transaction(trans,
9774 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
9776 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
9778 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
9781 ret = maybe_repair_root_item(info, path, &found_key,
9786 if (!trans && repair) {
9789 btrfs_release_path(path);
9799 free_roots_info_cache();
9800 btrfs_free_path(path);
9802 btrfs_commit_transaction(trans, info->tree_root);
9809 const char * const cmd_check_usage[] = {
9810 "btrfs check [options] <device>",
9811 "Check structural integrity of a filesystem (unmounted).",
9812 "Check structural integrity of an unmounted filesystem. Verify internal",
9813 "trees' consistency and item connectivity. In the repair mode try to",
9814 "fix the problems found.",
9815 "WARNING: the repair mode is considered dangerous",
9817 "-s|--super <superblock> use this superblock copy",
9818 "-b|--backup use the first valid backup root copy",
9819 "--repair try to repair the filesystem",
9820 "--readonly run in read-only mode (default)",
9821 "--init-csum-tree create a new CRC tree",
9822 "--init-extent-tree create a new extent tree",
9823 "--check-data-csum verify checksums of data blocks",
9824 "-Q|--qgroup-report print a report on qgroup consistency",
9825 "-E|--subvol-extents <subvolid>",
9826 " print subvolume extents and sharing state",
9827 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
9828 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
9829 "-p|--progress indicate progress",
9833 int cmd_check(int argc, char **argv)
9835 struct cache_tree root_cache;
9836 struct btrfs_root *root;
9837 struct btrfs_fs_info *info;
9840 u64 tree_root_bytenr = 0;
9841 u64 chunk_root_bytenr = 0;
9842 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
9845 int init_csum_tree = 0;
9847 int qgroup_report = 0;
9848 int qgroups_repaired = 0;
9849 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
9853 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
9854 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
9855 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
9856 static const struct option long_options[] = {
9857 { "super", required_argument, NULL, 's' },
9858 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
9859 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
9860 { "init-csum-tree", no_argument, NULL,
9861 GETOPT_VAL_INIT_CSUM },
9862 { "init-extent-tree", no_argument, NULL,
9863 GETOPT_VAL_INIT_EXTENT },
9864 { "check-data-csum", no_argument, NULL,
9865 GETOPT_VAL_CHECK_CSUM },
9866 { "backup", no_argument, NULL, 'b' },
9867 { "subvol-extents", required_argument, NULL, 'E' },
9868 { "qgroup-report", no_argument, NULL, 'Q' },
9869 { "tree-root", required_argument, NULL, 'r' },
9870 { "chunk-root", required_argument, NULL,
9871 GETOPT_VAL_CHUNK_TREE },
9872 { "progress", no_argument, NULL, 'p' },
9876 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
9880 case 'a': /* ignored */ break;
9882 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
9885 num = arg_strtou64(optarg);
9886 if (num >= BTRFS_SUPER_MIRROR_MAX) {
9888 "ERROR: super mirror should be less than: %d\n",
9889 BTRFS_SUPER_MIRROR_MAX);
9892 bytenr = btrfs_sb_offset(((int)num));
9893 printf("using SB copy %llu, bytenr %llu\n", num,
9894 (unsigned long long)bytenr);
9900 subvolid = arg_strtou64(optarg);
9903 tree_root_bytenr = arg_strtou64(optarg);
9905 case GETOPT_VAL_CHUNK_TREE:
9906 chunk_root_bytenr = arg_strtou64(optarg);
9909 ctx.progress_enabled = true;
9913 usage(cmd_check_usage);
9914 case GETOPT_VAL_REPAIR:
9915 printf("enabling repair mode\n");
9917 ctree_flags |= OPEN_CTREE_WRITES;
9919 case GETOPT_VAL_READONLY:
9922 case GETOPT_VAL_INIT_CSUM:
9923 printf("Creating a new CRC tree\n");
9926 ctree_flags |= OPEN_CTREE_WRITES;
9928 case GETOPT_VAL_INIT_EXTENT:
9929 init_extent_tree = 1;
9930 ctree_flags |= (OPEN_CTREE_WRITES |
9931 OPEN_CTREE_NO_BLOCK_GROUPS);
9934 case GETOPT_VAL_CHECK_CSUM:
9935 check_data_csum = 1;
9940 if (check_argc_exact(argc - optind, 1))
9941 usage(cmd_check_usage);
9943 if (ctx.progress_enabled) {
9944 ctx.tp = TASK_NOTHING;
9945 ctx.info = task_init(print_status_check, print_status_return, &ctx);
9948 /* This check is the only reason for --readonly to exist */
9949 if (readonly && repair) {
9950 fprintf(stderr, "Repair options are not compatible with --readonly\n");
9955 cache_tree_init(&root_cache);
9957 if((ret = check_mounted(argv[optind])) < 0) {
9958 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
9961 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
9966 /* only allow partial opening under repair mode */
9968 ctree_flags |= OPEN_CTREE_PARTIAL;
9970 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
9971 chunk_root_bytenr, ctree_flags);
9973 fprintf(stderr, "Couldn't open file system\n");
9979 root = info->fs_root;
9982 * repair mode will force us to commit transaction which
9983 * will make us fail to load log tree when mounting.
9985 if (repair && btrfs_super_log_root(info->super_copy)) {
9986 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
9991 ret = zero_log_tree(root);
9993 fprintf(stderr, "fail to zero log tree\n");
9998 uuid_unparse(info->super_copy->fsid, uuidbuf);
9999 if (qgroup_report) {
10000 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10002 ret = qgroup_verify_all(info);
10008 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10009 subvolid, argv[optind], uuidbuf);
10010 ret = print_extent_state(info, subvolid);
10013 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10015 if (!extent_buffer_uptodate(info->tree_root->node) ||
10016 !extent_buffer_uptodate(info->dev_root->node) ||
10017 !extent_buffer_uptodate(info->chunk_root->node)) {
10018 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10023 if (init_extent_tree || init_csum_tree) {
10024 struct btrfs_trans_handle *trans;
10026 trans = btrfs_start_transaction(info->extent_root, 0);
10027 if (IS_ERR(trans)) {
10028 fprintf(stderr, "Error starting transaction\n");
10029 ret = PTR_ERR(trans);
10033 if (init_extent_tree) {
10034 printf("Creating a new extent tree\n");
10035 ret = reinit_extent_tree(trans, info);
10040 if (init_csum_tree) {
10041 fprintf(stderr, "Reinit crc root\n");
10042 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10044 fprintf(stderr, "crc root initialization failed\n");
10049 ret = fill_csum_tree(trans, info->csum_root,
10052 fprintf(stderr, "crc refilling failed\n");
10057 * Ok now we commit and run the normal fsck, which will add
10058 * extent entries for all of the items it finds.
10060 ret = btrfs_commit_transaction(trans, info->extent_root);
10064 if (!extent_buffer_uptodate(info->extent_root->node)) {
10065 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10069 if (!extent_buffer_uptodate(info->csum_root->node)) {
10070 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10075 if (!ctx.progress_enabled)
10076 fprintf(stderr, "checking extents\n");
10077 ret = check_chunks_and_extents(root);
10079 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10081 ret = repair_root_items(info);
10085 fprintf(stderr, "Fixed %d roots.\n", ret);
10087 } else if (ret > 0) {
10089 "Found %d roots with an outdated root item.\n",
10092 "Please run a filesystem check with the option --repair to fix them.\n");
10097 if (!ctx.progress_enabled) {
10098 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10099 fprintf(stderr, "checking free space tree\n");
10101 fprintf(stderr, "checking free space cache\n");
10103 ret = check_space_cache(root);
10108 * We used to have to have these hole extents in between our real
10109 * extents so if we don't have this flag set we need to make sure there
10110 * are no gaps in the file extents for inodes, otherwise we can just
10111 * ignore it when this happens.
10113 no_holes = btrfs_fs_incompat(root->fs_info,
10114 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
10115 if (!ctx.progress_enabled)
10116 fprintf(stderr, "checking fs roots\n");
10117 ret = check_fs_roots(root, &root_cache);
10121 fprintf(stderr, "checking csums\n");
10122 ret = check_csums(root);
10126 fprintf(stderr, "checking root refs\n");
10127 ret = check_root_refs(root, &root_cache);
10131 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
10132 struct extent_buffer *eb;
10134 eb = list_first_entry(&root->fs_info->recow_ebs,
10135 struct extent_buffer, recow);
10136 list_del_init(&eb->recow);
10137 ret = recow_extent_buffer(root, eb);
10142 while (!list_empty(&delete_items)) {
10143 struct bad_item *bad;
10145 bad = list_first_entry(&delete_items, struct bad_item, list);
10146 list_del_init(&bad->list);
10148 ret = delete_bad_item(root, bad);
10152 if (info->quota_enabled) {
10154 fprintf(stderr, "checking quota groups\n");
10155 err = qgroup_verify_all(info);
10159 err = repair_qgroups(info, &qgroups_repaired);
10164 if (!list_empty(&root->fs_info->recow_ebs)) {
10165 fprintf(stderr, "Transid errors in file system\n");
10169 /* Don't override original ret */
10170 if (!ret && qgroups_repaired)
10171 ret = qgroups_repaired;
10173 if (found_old_backref) { /*
10174 * there was a disk format change when mixed
10175 * backref was in testing tree. The old format
10176 * existed about one week.
10178 printf("\n * Found old mixed backref format. "
10179 "The old format is not supported! *"
10180 "\n * Please mount the FS in readonly mode, "
10181 "backup data and re-format the FS. *\n\n");
10184 printf("found %llu bytes used err is %d\n",
10185 (unsigned long long)bytes_used, ret);
10186 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
10187 printf("total tree bytes: %llu\n",
10188 (unsigned long long)total_btree_bytes);
10189 printf("total fs tree bytes: %llu\n",
10190 (unsigned long long)total_fs_tree_bytes);
10191 printf("total extent tree bytes: %llu\n",
10192 (unsigned long long)total_extent_tree_bytes);
10193 printf("btree space waste bytes: %llu\n",
10194 (unsigned long long)btree_space_waste);
10195 printf("file data blocks allocated: %llu\n referenced %llu\n",
10196 (unsigned long long)data_bytes_allocated,
10197 (unsigned long long)data_bytes_referenced);
10199 free_qgroup_counts();
10200 free_root_recs_tree(&root_cache);
10204 if (ctx.progress_enabled)
10205 task_deinit(ctx.info);