2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
23 #include <sys/types.h>
27 #include <uuid/uuid.h>
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
49 TASK_NOTHING, /* have to be the last element */
54 enum task_position tp;
56 struct task_info *info;
59 static u64 bytes_used = 0;
60 static u64 total_csum_bytes = 0;
61 static u64 total_btree_bytes = 0;
62 static u64 total_fs_tree_bytes = 0;
63 static u64 total_extent_tree_bytes = 0;
64 static u64 btree_space_waste = 0;
65 static u64 data_bytes_allocated = 0;
66 static u64 data_bytes_referenced = 0;
67 static int found_old_backref = 0;
68 static LIST_HEAD(duplicate_extents);
69 static LIST_HEAD(delete_items);
70 static int no_holes = 0;
71 static int init_extent_tree = 0;
72 static int check_data_csum = 0;
73 static struct btrfs_fs_info *global_info;
74 static struct task_ctx ctx = { 0 };
75 static struct cache_tree *roots_info_cache = NULL;
77 struct extent_backref {
79 unsigned int is_data:1;
80 unsigned int found_extent_tree:1;
81 unsigned int full_backref:1;
82 unsigned int found_ref:1;
83 unsigned int broken:1;
86 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
88 return rb_entry(node, struct extent_backref, node);
92 struct extent_backref node;
106 static inline struct data_backref* to_data_backref(struct extent_backref *back)
108 return container_of(back, struct data_backref, node);
111 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
113 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
114 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
115 struct data_backref *back1 = to_data_backref(ext1);
116 struct data_backref *back2 = to_data_backref(ext2);
118 WARN_ON(!ext1->is_data);
119 WARN_ON(!ext2->is_data);
121 /* parent and root are a union, so this covers both */
122 if (back1->parent > back2->parent)
124 if (back1->parent < back2->parent)
127 /* This is a full backref and the parents match. */
128 if (back1->node.full_backref)
131 if (back1->owner > back2->owner)
133 if (back1->owner < back2->owner)
136 if (back1->offset > back2->offset)
138 if (back1->offset < back2->offset)
141 if (back1->bytes > back2->bytes)
143 if (back1->bytes < back2->bytes)
146 if (back1->found_ref && back2->found_ref) {
147 if (back1->disk_bytenr > back2->disk_bytenr)
149 if (back1->disk_bytenr < back2->disk_bytenr)
152 if (back1->found_ref > back2->found_ref)
154 if (back1->found_ref < back2->found_ref)
162 * Much like data_backref, just removed the undetermined members
163 * and change it to use list_head.
164 * During extent scan, it is stored in root->orphan_data_extent.
165 * During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
167 struct orphan_data_extent {
168 struct list_head list;
176 struct tree_backref {
177 struct extent_backref node;
184 static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
186 return container_of(back, struct tree_backref, node);
189 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
191 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
192 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
193 struct tree_backref *back1 = to_tree_backref(ext1);
194 struct tree_backref *back2 = to_tree_backref(ext2);
196 WARN_ON(ext1->is_data);
197 WARN_ON(ext2->is_data);
199 /* parent and root are a union, so this covers both */
200 if (back1->parent > back2->parent)
202 if (back1->parent < back2->parent)
208 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
210 struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
211 struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
213 if (ext1->is_data > ext2->is_data)
216 if (ext1->is_data < ext2->is_data)
219 if (ext1->full_backref > ext2->full_backref)
221 if (ext1->full_backref < ext2->full_backref)
225 return compare_data_backref(node1, node2);
227 return compare_tree_backref(node1, node2);
230 /* Explicit initialization for extent_record::flag_block_full_backref */
231 enum { FLAG_UNSET = 2 };
233 struct extent_record {
234 struct list_head backrefs;
235 struct list_head dups;
236 struct rb_root backref_tree;
237 struct list_head list;
238 struct cache_extent cache;
239 struct btrfs_disk_key parent_key;
244 u64 extent_item_refs;
246 u64 parent_generation;
250 unsigned int flag_block_full_backref:2;
251 unsigned int found_rec:1;
252 unsigned int content_checked:1;
253 unsigned int owner_ref_checked:1;
254 unsigned int is_root:1;
255 unsigned int metadata:1;
256 unsigned int bad_full_backref:1;
257 unsigned int crossing_stripes:1;
258 unsigned int wrong_chunk_type:1;
261 static inline struct extent_record* to_extent_record(struct list_head *entry)
263 return container_of(entry, struct extent_record, list);
266 struct inode_backref {
267 struct list_head list;
268 unsigned int found_dir_item:1;
269 unsigned int found_dir_index:1;
270 unsigned int found_inode_ref:1;
271 unsigned int filetype:8;
273 unsigned int ref_type;
280 static inline struct inode_backref* to_inode_backref(struct list_head *entry)
282 return list_entry(entry, struct inode_backref, list);
285 struct root_item_record {
286 struct list_head list;
293 struct btrfs_key drop_key;
296 #define REF_ERR_NO_DIR_ITEM (1 << 0)
297 #define REF_ERR_NO_DIR_INDEX (1 << 1)
298 #define REF_ERR_NO_INODE_REF (1 << 2)
299 #define REF_ERR_DUP_DIR_ITEM (1 << 3)
300 #define REF_ERR_DUP_DIR_INDEX (1 << 4)
301 #define REF_ERR_DUP_INODE_REF (1 << 5)
302 #define REF_ERR_INDEX_UNMATCH (1 << 6)
303 #define REF_ERR_FILETYPE_UNMATCH (1 << 7)
304 #define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
305 #define REF_ERR_NO_ROOT_REF (1 << 9)
306 #define REF_ERR_NO_ROOT_BACKREF (1 << 10)
307 #define REF_ERR_DUP_ROOT_REF (1 << 11)
308 #define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
310 struct file_extent_hole {
316 struct inode_record {
317 struct list_head backrefs;
318 unsigned int checked:1;
319 unsigned int merging:1;
320 unsigned int found_inode_item:1;
321 unsigned int found_dir_item:1;
322 unsigned int found_file_extent:1;
323 unsigned int found_csum_item:1;
324 unsigned int some_csum_missing:1;
325 unsigned int nodatasum:1;
338 struct rb_root holes;
339 struct list_head orphan_extents;
344 #define I_ERR_NO_INODE_ITEM (1 << 0)
345 #define I_ERR_NO_ORPHAN_ITEM (1 << 1)
346 #define I_ERR_DUP_INODE_ITEM (1 << 2)
347 #define I_ERR_DUP_DIR_INDEX (1 << 3)
348 #define I_ERR_ODD_DIR_ITEM (1 << 4)
349 #define I_ERR_ODD_FILE_EXTENT (1 << 5)
350 #define I_ERR_BAD_FILE_EXTENT (1 << 6)
351 #define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
352 #define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
353 #define I_ERR_DIR_ISIZE_WRONG (1 << 9)
354 #define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
355 #define I_ERR_ODD_CSUM_ITEM (1 << 11)
356 #define I_ERR_SOME_CSUM_MISSING (1 << 12)
357 #define I_ERR_LINK_COUNT_WRONG (1 << 13)
358 #define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
360 struct root_backref {
361 struct list_head list;
362 unsigned int found_dir_item:1;
363 unsigned int found_dir_index:1;
364 unsigned int found_back_ref:1;
365 unsigned int found_forward_ref:1;
366 unsigned int reachable:1;
375 static inline struct root_backref* to_root_backref(struct list_head *entry)
377 return list_entry(entry, struct root_backref, list);
381 struct list_head backrefs;
382 struct cache_extent cache;
383 unsigned int found_root_item:1;
389 struct cache_extent cache;
394 struct cache_extent cache;
395 struct cache_tree root_cache;
396 struct cache_tree inode_cache;
397 struct inode_record *current;
406 struct walk_control {
407 struct cache_tree shared;
408 struct shared_node *nodes[BTRFS_MAX_LEVEL];
414 struct btrfs_key key;
416 struct list_head list;
419 struct extent_entry {
424 struct list_head list;
427 struct root_item_info {
428 /* level of the root */
430 /* number of nodes at this level, must be 1 for a root */
434 struct cache_extent cache_extent;
438 * Error bit for low memory mode check.
440 * Currently no caller cares about it yet. Just internal use for error
443 #define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
444 #define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
445 #define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
446 #define REFERENCER_MISSING (1 << 3) /* Referencer not found */
447 #define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
448 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
449 #define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
450 #define UNKNOWN_TYPE (1 << 6) /* Unknown type */
451 #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
452 #define CHUNK_TYPE_MISMATCH (1 << 8)
454 static void *print_status_check(void *p)
456 struct task_ctx *priv = p;
457 const char work_indicator[] = { '.', 'o', 'O', 'o' };
459 static char *task_position_string[] = {
461 "checking free space cache",
465 task_period_start(priv->info, 1000 /* 1s */);
467 if (priv->tp == TASK_NOTHING)
471 printf("%s [%c]\r", task_position_string[priv->tp],
472 work_indicator[count % 4]);
475 task_period_wait(priv->info);
480 static int print_status_return(void *p)
488 /* Compatible function to allow reuse of old codes */
489 static u64 first_extent_gap(struct rb_root *holes)
491 struct file_extent_hole *hole;
493 if (RB_EMPTY_ROOT(holes))
496 hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
500 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
502 struct file_extent_hole *hole1;
503 struct file_extent_hole *hole2;
505 hole1 = rb_entry(node1, struct file_extent_hole, node);
506 hole2 = rb_entry(node2, struct file_extent_hole, node);
508 if (hole1->start > hole2->start)
510 if (hole1->start < hole2->start)
512 /* Now hole1->start == hole2->start */
513 if (hole1->len >= hole2->len)
515 * Hole 1 will be merge center
516 * Same hole will be merged later
519 /* Hole 2 will be merge center */
524 * Add a hole to the record
526 * This will do hole merge for copy_file_extent_holes(),
527 * which will ensure there won't be continuous holes.
529 static int add_file_extent_hole(struct rb_root *holes,
532 struct file_extent_hole *hole;
533 struct file_extent_hole *prev = NULL;
534 struct file_extent_hole *next = NULL;
536 hole = malloc(sizeof(*hole));
541 /* Since compare will not return 0, no -EEXIST will happen */
542 rb_insert(holes, &hole->node, compare_hole);
544 /* simple merge with previous hole */
545 if (rb_prev(&hole->node))
546 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
548 if (prev && prev->start + prev->len >= hole->start) {
549 hole->len = hole->start + hole->len - prev->start;
550 hole->start = prev->start;
551 rb_erase(&prev->node, holes);
556 /* iterate merge with next holes */
558 if (!rb_next(&hole->node))
560 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
562 if (hole->start + hole->len >= next->start) {
563 if (hole->start + hole->len <= next->start + next->len)
564 hole->len = next->start + next->len -
566 rb_erase(&next->node, holes);
575 static int compare_hole_range(struct rb_node *node, void *data)
577 struct file_extent_hole *hole;
580 hole = (struct file_extent_hole *)data;
583 hole = rb_entry(node, struct file_extent_hole, node);
584 if (start < hole->start)
586 if (start >= hole->start && start < hole->start + hole->len)
592 * Delete a hole in the record
594 * This will do the hole split and is much restrict than add.
596 static int del_file_extent_hole(struct rb_root *holes,
599 struct file_extent_hole *hole;
600 struct file_extent_hole tmp;
605 struct rb_node *node;
612 node = rb_search(holes, &tmp, compare_hole_range, NULL);
615 hole = rb_entry(node, struct file_extent_hole, node);
616 if (start + len > hole->start + hole->len)
620 * Now there will be no overlap, delete the hole and re-add the
621 * split(s) if they exists.
623 if (start > hole->start) {
624 prev_start = hole->start;
625 prev_len = start - hole->start;
628 if (hole->start + hole->len > start + len) {
629 next_start = start + len;
630 next_len = hole->start + hole->len - start - len;
633 rb_erase(node, holes);
636 ret = add_file_extent_hole(holes, prev_start, prev_len);
641 ret = add_file_extent_hole(holes, next_start, next_len);
648 static int copy_file_extent_holes(struct rb_root *dst,
651 struct file_extent_hole *hole;
652 struct rb_node *node;
655 node = rb_first(src);
657 hole = rb_entry(node, struct file_extent_hole, node);
658 ret = add_file_extent_hole(dst, hole->start, hole->len);
661 node = rb_next(node);
666 static void free_file_extent_holes(struct rb_root *holes)
668 struct rb_node *node;
669 struct file_extent_hole *hole;
671 node = rb_first(holes);
673 hole = rb_entry(node, struct file_extent_hole, node);
674 rb_erase(node, holes);
676 node = rb_first(holes);
680 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
682 static void record_root_in_trans(struct btrfs_trans_handle *trans,
683 struct btrfs_root *root)
685 if (root->last_trans != trans->transid) {
686 root->track_dirty = 1;
687 root->last_trans = trans->transid;
688 root->commit_root = root->node;
689 extent_buffer_get(root->node);
693 static u8 imode_to_type(u32 imode)
696 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
697 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
698 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
699 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
700 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
701 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
702 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
703 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
706 return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
710 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
712 struct device_record *rec1;
713 struct device_record *rec2;
715 rec1 = rb_entry(node1, struct device_record, node);
716 rec2 = rb_entry(node2, struct device_record, node);
717 if (rec1->devid > rec2->devid)
719 else if (rec1->devid < rec2->devid)
725 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
727 struct inode_record *rec;
728 struct inode_backref *backref;
729 struct inode_backref *orig;
730 struct inode_backref *tmp;
731 struct orphan_data_extent *src_orphan;
732 struct orphan_data_extent *dst_orphan;
736 rec = malloc(sizeof(*rec));
738 return ERR_PTR(-ENOMEM);
739 memcpy(rec, orig_rec, sizeof(*rec));
741 INIT_LIST_HEAD(&rec->backrefs);
742 INIT_LIST_HEAD(&rec->orphan_extents);
743 rec->holes = RB_ROOT;
745 list_for_each_entry(orig, &orig_rec->backrefs, list) {
746 size = sizeof(*orig) + orig->namelen + 1;
747 backref = malloc(size);
752 memcpy(backref, orig, size);
753 list_add_tail(&backref->list, &rec->backrefs);
755 list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
756 dst_orphan = malloc(sizeof(*dst_orphan));
761 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
762 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
764 ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
770 if (!list_empty(&rec->backrefs))
771 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
772 list_del(&orig->list);
776 if (!list_empty(&rec->orphan_extents))
777 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
778 list_del(&orig->list);
787 static void print_orphan_data_extents(struct list_head *orphan_extents,
790 struct orphan_data_extent *orphan;
792 if (list_empty(orphan_extents))
794 printf("The following data extent is lost in tree %llu:\n",
796 list_for_each_entry(orphan, orphan_extents, list) {
797 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
798 orphan->objectid, orphan->offset, orphan->disk_bytenr,
803 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
805 u64 root_objectid = root->root_key.objectid;
806 int errors = rec->errors;
810 /* reloc root errors, we print its corresponding fs root objectid*/
811 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
812 root_objectid = root->root_key.offset;
813 fprintf(stderr, "reloc");
815 fprintf(stderr, "root %llu inode %llu errors %x",
816 (unsigned long long) root_objectid,
817 (unsigned long long) rec->ino, rec->errors);
819 if (errors & I_ERR_NO_INODE_ITEM)
820 fprintf(stderr, ", no inode item");
821 if (errors & I_ERR_NO_ORPHAN_ITEM)
822 fprintf(stderr, ", no orphan item");
823 if (errors & I_ERR_DUP_INODE_ITEM)
824 fprintf(stderr, ", dup inode item");
825 if (errors & I_ERR_DUP_DIR_INDEX)
826 fprintf(stderr, ", dup dir index");
827 if (errors & I_ERR_ODD_DIR_ITEM)
828 fprintf(stderr, ", odd dir item");
829 if (errors & I_ERR_ODD_FILE_EXTENT)
830 fprintf(stderr, ", odd file extent");
831 if (errors & I_ERR_BAD_FILE_EXTENT)
832 fprintf(stderr, ", bad file extent");
833 if (errors & I_ERR_FILE_EXTENT_OVERLAP)
834 fprintf(stderr, ", file extent overlap");
835 if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
836 fprintf(stderr, ", file extent discount");
837 if (errors & I_ERR_DIR_ISIZE_WRONG)
838 fprintf(stderr, ", dir isize wrong");
839 if (errors & I_ERR_FILE_NBYTES_WRONG)
840 fprintf(stderr, ", nbytes wrong");
841 if (errors & I_ERR_ODD_CSUM_ITEM)
842 fprintf(stderr, ", odd csum item");
843 if (errors & I_ERR_SOME_CSUM_MISSING)
844 fprintf(stderr, ", some csum missing");
845 if (errors & I_ERR_LINK_COUNT_WRONG)
846 fprintf(stderr, ", link count wrong");
847 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
848 fprintf(stderr, ", orphan file extent");
849 fprintf(stderr, "\n");
850 /* Print the orphan extents if needed */
851 if (errors & I_ERR_FILE_EXTENT_ORPHAN)
852 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
854 /* Print the holes if needed */
855 if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
856 struct file_extent_hole *hole;
857 struct rb_node *node;
860 node = rb_first(&rec->holes);
861 fprintf(stderr, "Found file extent holes:\n");
864 hole = rb_entry(node, struct file_extent_hole, node);
865 fprintf(stderr, "\tstart: %llu, len: %llu\n",
866 hole->start, hole->len);
867 node = rb_next(node);
870 fprintf(stderr, "\tstart: 0, len: %llu\n",
871 round_up(rec->isize, root->sectorsize));
875 static void print_ref_error(int errors)
877 if (errors & REF_ERR_NO_DIR_ITEM)
878 fprintf(stderr, ", no dir item");
879 if (errors & REF_ERR_NO_DIR_INDEX)
880 fprintf(stderr, ", no dir index");
881 if (errors & REF_ERR_NO_INODE_REF)
882 fprintf(stderr, ", no inode ref");
883 if (errors & REF_ERR_DUP_DIR_ITEM)
884 fprintf(stderr, ", dup dir item");
885 if (errors & REF_ERR_DUP_DIR_INDEX)
886 fprintf(stderr, ", dup dir index");
887 if (errors & REF_ERR_DUP_INODE_REF)
888 fprintf(stderr, ", dup inode ref");
889 if (errors & REF_ERR_INDEX_UNMATCH)
890 fprintf(stderr, ", index mismatch");
891 if (errors & REF_ERR_FILETYPE_UNMATCH)
892 fprintf(stderr, ", filetype mismatch");
893 if (errors & REF_ERR_NAME_TOO_LONG)
894 fprintf(stderr, ", name too long");
895 if (errors & REF_ERR_NO_ROOT_REF)
896 fprintf(stderr, ", no root ref");
897 if (errors & REF_ERR_NO_ROOT_BACKREF)
898 fprintf(stderr, ", no root backref");
899 if (errors & REF_ERR_DUP_ROOT_REF)
900 fprintf(stderr, ", dup root ref");
901 if (errors & REF_ERR_DUP_ROOT_BACKREF)
902 fprintf(stderr, ", dup root backref");
903 fprintf(stderr, "\n");
906 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
909 struct ptr_node *node;
910 struct cache_extent *cache;
911 struct inode_record *rec = NULL;
914 cache = lookup_cache_extent(inode_cache, ino, 1);
916 node = container_of(cache, struct ptr_node, cache);
918 if (mod && rec->refs > 1) {
919 node->data = clone_inode_rec(rec);
920 if (IS_ERR(node->data))
926 rec = calloc(1, sizeof(*rec));
928 return ERR_PTR(-ENOMEM);
930 rec->extent_start = (u64)-1;
932 INIT_LIST_HEAD(&rec->backrefs);
933 INIT_LIST_HEAD(&rec->orphan_extents);
934 rec->holes = RB_ROOT;
936 node = malloc(sizeof(*node));
939 return ERR_PTR(-ENOMEM);
941 node->cache.start = ino;
942 node->cache.size = 1;
945 if (ino == BTRFS_FREE_INO_OBJECTID)
948 ret = insert_cache_extent(inode_cache, &node->cache);
950 return ERR_PTR(-EEXIST);
955 static void free_orphan_data_extents(struct list_head *orphan_extents)
957 struct orphan_data_extent *orphan;
959 while (!list_empty(orphan_extents)) {
960 orphan = list_entry(orphan_extents->next,
961 struct orphan_data_extent, list);
962 list_del(&orphan->list);
967 static void free_inode_rec(struct inode_record *rec)
969 struct inode_backref *backref;
974 while (!list_empty(&rec->backrefs)) {
975 backref = to_inode_backref(rec->backrefs.next);
976 list_del(&backref->list);
979 free_orphan_data_extents(&rec->orphan_extents);
980 free_file_extent_holes(&rec->holes);
984 static int can_free_inode_rec(struct inode_record *rec)
986 if (!rec->errors && rec->checked && rec->found_inode_item &&
987 rec->nlink == rec->found_link && list_empty(&rec->backrefs))
992 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
993 struct inode_record *rec)
995 struct cache_extent *cache;
996 struct inode_backref *tmp, *backref;
997 struct ptr_node *node;
998 unsigned char filetype;
1000 if (!rec->found_inode_item)
1003 filetype = imode_to_type(rec->imode);
1004 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
1005 if (backref->found_dir_item && backref->found_dir_index) {
1006 if (backref->filetype != filetype)
1007 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1008 if (!backref->errors && backref->found_inode_ref &&
1009 rec->nlink == rec->found_link) {
1010 list_del(&backref->list);
1016 if (!rec->checked || rec->merging)
1019 if (S_ISDIR(rec->imode)) {
1020 if (rec->found_size != rec->isize)
1021 rec->errors |= I_ERR_DIR_ISIZE_WRONG;
1022 if (rec->found_file_extent)
1023 rec->errors |= I_ERR_ODD_FILE_EXTENT;
1024 } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1025 if (rec->found_dir_item)
1026 rec->errors |= I_ERR_ODD_DIR_ITEM;
1027 if (rec->found_size != rec->nbytes)
1028 rec->errors |= I_ERR_FILE_NBYTES_WRONG;
1029 if (rec->nlink > 0 && !no_holes &&
1030 (rec->extent_end < rec->isize ||
1031 first_extent_gap(&rec->holes) < rec->isize))
1032 rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
1035 if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
1036 if (rec->found_csum_item && rec->nodatasum)
1037 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1038 if (rec->some_csum_missing && !rec->nodatasum)
1039 rec->errors |= I_ERR_SOME_CSUM_MISSING;
1042 BUG_ON(rec->refs != 1);
1043 if (can_free_inode_rec(rec)) {
1044 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
1045 node = container_of(cache, struct ptr_node, cache);
1046 BUG_ON(node->data != rec);
1047 remove_cache_extent(inode_cache, &node->cache);
1049 free_inode_rec(rec);
1053 static int check_orphan_item(struct btrfs_root *root, u64 ino)
1055 struct btrfs_path path;
1056 struct btrfs_key key;
1059 key.objectid = BTRFS_ORPHAN_OBJECTID;
1060 key.type = BTRFS_ORPHAN_ITEM_KEY;
1063 btrfs_init_path(&path);
1064 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
1065 btrfs_release_path(&path);
1071 static int process_inode_item(struct extent_buffer *eb,
1072 int slot, struct btrfs_key *key,
1073 struct shared_node *active_node)
1075 struct inode_record *rec;
1076 struct btrfs_inode_item *item;
1078 rec = active_node->current;
1079 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1080 if (rec->found_inode_item) {
1081 rec->errors |= I_ERR_DUP_INODE_ITEM;
1084 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
1085 rec->nlink = btrfs_inode_nlink(eb, item);
1086 rec->isize = btrfs_inode_size(eb, item);
1087 rec->nbytes = btrfs_inode_nbytes(eb, item);
1088 rec->imode = btrfs_inode_mode(eb, item);
1089 if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
1091 rec->found_inode_item = 1;
1092 if (rec->nlink == 0)
1093 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
1094 maybe_free_inode_rec(&active_node->inode_cache, rec);
1098 static struct inode_backref *get_inode_backref(struct inode_record *rec,
1100 int namelen, u64 dir)
1102 struct inode_backref *backref;
1104 list_for_each_entry(backref, &rec->backrefs, list) {
1105 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
1107 if (backref->dir != dir || backref->namelen != namelen)
1109 if (memcmp(name, backref->name, namelen))
1114 backref = malloc(sizeof(*backref) + namelen + 1);
1117 memset(backref, 0, sizeof(*backref));
1119 backref->namelen = namelen;
1120 memcpy(backref->name, name, namelen);
1121 backref->name[namelen] = '\0';
1122 list_add_tail(&backref->list, &rec->backrefs);
1126 static int add_inode_backref(struct cache_tree *inode_cache,
1127 u64 ino, u64 dir, u64 index,
1128 const char *name, int namelen,
1129 int filetype, int itemtype, int errors)
1131 struct inode_record *rec;
1132 struct inode_backref *backref;
1134 rec = get_inode_rec(inode_cache, ino, 1);
1135 BUG_ON(IS_ERR(rec));
1136 backref = get_inode_backref(rec, name, namelen, dir);
1139 backref->errors |= errors;
1140 if (itemtype == BTRFS_DIR_INDEX_KEY) {
1141 if (backref->found_dir_index)
1142 backref->errors |= REF_ERR_DUP_DIR_INDEX;
1143 if (backref->found_inode_ref && backref->index != index)
1144 backref->errors |= REF_ERR_INDEX_UNMATCH;
1145 if (backref->found_dir_item && backref->filetype != filetype)
1146 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1148 backref->index = index;
1149 backref->filetype = filetype;
1150 backref->found_dir_index = 1;
1151 } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
1153 if (backref->found_dir_item)
1154 backref->errors |= REF_ERR_DUP_DIR_ITEM;
1155 if (backref->found_dir_index && backref->filetype != filetype)
1156 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
1158 backref->filetype = filetype;
1159 backref->found_dir_item = 1;
1160 } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
1161 (itemtype == BTRFS_INODE_EXTREF_KEY)) {
1162 if (backref->found_inode_ref)
1163 backref->errors |= REF_ERR_DUP_INODE_REF;
1164 if (backref->found_dir_index && backref->index != index)
1165 backref->errors |= REF_ERR_INDEX_UNMATCH;
1167 backref->index = index;
1169 backref->ref_type = itemtype;
1170 backref->found_inode_ref = 1;
1175 maybe_free_inode_rec(inode_cache, rec);
1179 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
1180 struct cache_tree *dst_cache)
1182 struct inode_backref *backref;
1187 list_for_each_entry(backref, &src->backrefs, list) {
1188 if (backref->found_dir_index) {
1189 add_inode_backref(dst_cache, dst->ino, backref->dir,
1190 backref->index, backref->name,
1191 backref->namelen, backref->filetype,
1192 BTRFS_DIR_INDEX_KEY, backref->errors);
1194 if (backref->found_dir_item) {
1196 add_inode_backref(dst_cache, dst->ino,
1197 backref->dir, 0, backref->name,
1198 backref->namelen, backref->filetype,
1199 BTRFS_DIR_ITEM_KEY, backref->errors);
1201 if (backref->found_inode_ref) {
1202 add_inode_backref(dst_cache, dst->ino,
1203 backref->dir, backref->index,
1204 backref->name, backref->namelen, 0,
1205 backref->ref_type, backref->errors);
1209 if (src->found_dir_item)
1210 dst->found_dir_item = 1;
1211 if (src->found_file_extent)
1212 dst->found_file_extent = 1;
1213 if (src->found_csum_item)
1214 dst->found_csum_item = 1;
1215 if (src->some_csum_missing)
1216 dst->some_csum_missing = 1;
1217 if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1218 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1223 BUG_ON(src->found_link < dir_count);
1224 dst->found_link += src->found_link - dir_count;
1225 dst->found_size += src->found_size;
1226 if (src->extent_start != (u64)-1) {
1227 if (dst->extent_start == (u64)-1) {
1228 dst->extent_start = src->extent_start;
1229 dst->extent_end = src->extent_end;
1231 if (dst->extent_end > src->extent_start)
1232 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1233 else if (dst->extent_end < src->extent_start) {
1234 ret = add_file_extent_hole(&dst->holes,
1236 src->extent_start - dst->extent_end);
1238 if (dst->extent_end < src->extent_end)
1239 dst->extent_end = src->extent_end;
1243 dst->errors |= src->errors;
1244 if (src->found_inode_item) {
1245 if (!dst->found_inode_item) {
1246 dst->nlink = src->nlink;
1247 dst->isize = src->isize;
1248 dst->nbytes = src->nbytes;
1249 dst->imode = src->imode;
1250 dst->nodatasum = src->nodatasum;
1251 dst->found_inode_item = 1;
1253 dst->errors |= I_ERR_DUP_INODE_ITEM;
1261 static int splice_shared_node(struct shared_node *src_node,
1262 struct shared_node *dst_node)
1264 struct cache_extent *cache;
1265 struct ptr_node *node, *ins;
1266 struct cache_tree *src, *dst;
1267 struct inode_record *rec, *conflict;
1268 u64 current_ino = 0;
1272 if (--src_node->refs == 0)
1274 if (src_node->current)
1275 current_ino = src_node->current->ino;
1277 src = &src_node->root_cache;
1278 dst = &dst_node->root_cache;
1280 cache = search_cache_extent(src, 0);
1282 node = container_of(cache, struct ptr_node, cache);
1284 cache = next_cache_extent(cache);
1287 remove_cache_extent(src, &node->cache);
1290 ins = malloc(sizeof(*ins));
1292 ins->cache.start = node->cache.start;
1293 ins->cache.size = node->cache.size;
1297 ret = insert_cache_extent(dst, &ins->cache);
1298 if (ret == -EEXIST) {
1299 conflict = get_inode_rec(dst, rec->ino, 1);
1300 BUG_ON(IS_ERR(conflict));
1301 merge_inode_recs(rec, conflict, dst);
1303 conflict->checked = 1;
1304 if (dst_node->current == conflict)
1305 dst_node->current = NULL;
1307 maybe_free_inode_rec(dst, conflict);
1308 free_inode_rec(rec);
1315 if (src == &src_node->root_cache) {
1316 src = &src_node->inode_cache;
1317 dst = &dst_node->inode_cache;
1321 if (current_ino > 0 && (!dst_node->current ||
1322 current_ino > dst_node->current->ino)) {
1323 if (dst_node->current) {
1324 dst_node->current->checked = 1;
1325 maybe_free_inode_rec(dst, dst_node->current);
1327 dst_node->current = get_inode_rec(dst, current_ino, 1);
1328 BUG_ON(IS_ERR(dst_node->current));
1333 static void free_inode_ptr(struct cache_extent *cache)
1335 struct ptr_node *node;
1336 struct inode_record *rec;
1338 node = container_of(cache, struct ptr_node, cache);
1340 free_inode_rec(rec);
1344 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1346 static struct shared_node *find_shared_node(struct cache_tree *shared,
1349 struct cache_extent *cache;
1350 struct shared_node *node;
1352 cache = lookup_cache_extent(shared, bytenr, 1);
1354 node = container_of(cache, struct shared_node, cache);
1360 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1363 struct shared_node *node;
1365 node = calloc(1, sizeof(*node));
1368 node->cache.start = bytenr;
1369 node->cache.size = 1;
1370 cache_tree_init(&node->root_cache);
1371 cache_tree_init(&node->inode_cache);
1374 ret = insert_cache_extent(shared, &node->cache);
1379 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1380 struct walk_control *wc, int level)
1382 struct shared_node *node;
1383 struct shared_node *dest;
1386 if (level == wc->active_node)
1389 BUG_ON(wc->active_node <= level);
1390 node = find_shared_node(&wc->shared, bytenr);
1392 ret = add_shared_node(&wc->shared, bytenr, refs);
1394 node = find_shared_node(&wc->shared, bytenr);
1395 wc->nodes[level] = node;
1396 wc->active_node = level;
1400 if (wc->root_level == wc->active_node &&
1401 btrfs_root_refs(&root->root_item) == 0) {
1402 if (--node->refs == 0) {
1403 free_inode_recs_tree(&node->root_cache);
1404 free_inode_recs_tree(&node->inode_cache);
1405 remove_cache_extent(&wc->shared, &node->cache);
1411 dest = wc->nodes[wc->active_node];
1412 splice_shared_node(node, dest);
1413 if (node->refs == 0) {
1414 remove_cache_extent(&wc->shared, &node->cache);
1420 static int leave_shared_node(struct btrfs_root *root,
1421 struct walk_control *wc, int level)
1423 struct shared_node *node;
1424 struct shared_node *dest;
1427 if (level == wc->root_level)
1430 for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1434 BUG_ON(i >= BTRFS_MAX_LEVEL);
1436 node = wc->nodes[wc->active_node];
1437 wc->nodes[wc->active_node] = NULL;
1438 wc->active_node = i;
1440 dest = wc->nodes[wc->active_node];
1441 if (wc->active_node < wc->root_level ||
1442 btrfs_root_refs(&root->root_item) > 0) {
1443 BUG_ON(node->refs <= 1);
1444 splice_shared_node(node, dest);
1446 BUG_ON(node->refs < 2);
1455 * 1 - if the root with id child_root_id is a child of root parent_root_id
1456 * 0 - if the root child_root_id isn't a child of the root parent_root_id but
1457 * has other root(s) as parent(s)
1458 * 2 - if the root child_root_id doesn't have any parent roots
1460 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1463 struct btrfs_path path;
1464 struct btrfs_key key;
1465 struct extent_buffer *leaf;
1469 btrfs_init_path(&path);
1471 key.objectid = parent_root_id;
1472 key.type = BTRFS_ROOT_REF_KEY;
1473 key.offset = child_root_id;
1474 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1478 btrfs_release_path(&path);
1482 key.objectid = child_root_id;
1483 key.type = BTRFS_ROOT_BACKREF_KEY;
1485 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1491 leaf = path.nodes[0];
1492 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1493 ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1496 leaf = path.nodes[0];
1499 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1500 if (key.objectid != child_root_id ||
1501 key.type != BTRFS_ROOT_BACKREF_KEY)
1506 if (key.offset == parent_root_id) {
1507 btrfs_release_path(&path);
1514 btrfs_release_path(&path);
1517 return has_parent ? 0 : 2;
1520 static int process_dir_item(struct btrfs_root *root,
1521 struct extent_buffer *eb,
1522 int slot, struct btrfs_key *key,
1523 struct shared_node *active_node)
1533 struct btrfs_dir_item *di;
1534 struct inode_record *rec;
1535 struct cache_tree *root_cache;
1536 struct cache_tree *inode_cache;
1537 struct btrfs_key location;
1538 char namebuf[BTRFS_NAME_LEN];
1540 root_cache = &active_node->root_cache;
1541 inode_cache = &active_node->inode_cache;
1542 rec = active_node->current;
1543 rec->found_dir_item = 1;
1545 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1546 total = btrfs_item_size_nr(eb, slot);
1547 while (cur < total) {
1549 btrfs_dir_item_key_to_cpu(eb, di, &location);
1550 name_len = btrfs_dir_name_len(eb, di);
1551 data_len = btrfs_dir_data_len(eb, di);
1552 filetype = btrfs_dir_type(eb, di);
1554 rec->found_size += name_len;
1555 if (name_len <= BTRFS_NAME_LEN) {
1559 len = BTRFS_NAME_LEN;
1560 error = REF_ERR_NAME_TOO_LONG;
1562 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1564 if (location.type == BTRFS_INODE_ITEM_KEY) {
1565 add_inode_backref(inode_cache, location.objectid,
1566 key->objectid, key->offset, namebuf,
1567 len, filetype, key->type, error);
1568 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1569 add_inode_backref(root_cache, location.objectid,
1570 key->objectid, key->offset,
1571 namebuf, len, filetype,
1574 fprintf(stderr, "invalid location in dir item %u\n",
1576 add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1577 key->objectid, key->offset, namebuf,
1578 len, filetype, key->type, error);
1581 len = sizeof(*di) + name_len + data_len;
1582 di = (struct btrfs_dir_item *)((char *)di + len);
1585 if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1586 rec->errors |= I_ERR_DUP_DIR_INDEX;
1591 static int process_inode_ref(struct extent_buffer *eb,
1592 int slot, struct btrfs_key *key,
1593 struct shared_node *active_node)
1601 struct cache_tree *inode_cache;
1602 struct btrfs_inode_ref *ref;
1603 char namebuf[BTRFS_NAME_LEN];
1605 inode_cache = &active_node->inode_cache;
1607 ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1608 total = btrfs_item_size_nr(eb, slot);
1609 while (cur < total) {
1610 name_len = btrfs_inode_ref_name_len(eb, ref);
1611 index = btrfs_inode_ref_index(eb, ref);
1612 if (name_len <= BTRFS_NAME_LEN) {
1616 len = BTRFS_NAME_LEN;
1617 error = REF_ERR_NAME_TOO_LONG;
1619 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1620 add_inode_backref(inode_cache, key->objectid, key->offset,
1621 index, namebuf, len, 0, key->type, error);
1623 len = sizeof(*ref) + name_len;
1624 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1630 static int process_inode_extref(struct extent_buffer *eb,
1631 int slot, struct btrfs_key *key,
1632 struct shared_node *active_node)
1641 struct cache_tree *inode_cache;
1642 struct btrfs_inode_extref *extref;
1643 char namebuf[BTRFS_NAME_LEN];
1645 inode_cache = &active_node->inode_cache;
1647 extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1648 total = btrfs_item_size_nr(eb, slot);
1649 while (cur < total) {
1650 name_len = btrfs_inode_extref_name_len(eb, extref);
1651 index = btrfs_inode_extref_index(eb, extref);
1652 parent = btrfs_inode_extref_parent(eb, extref);
1653 if (name_len <= BTRFS_NAME_LEN) {
1657 len = BTRFS_NAME_LEN;
1658 error = REF_ERR_NAME_TOO_LONG;
1660 read_extent_buffer(eb, namebuf,
1661 (unsigned long)(extref + 1), len);
1662 add_inode_backref(inode_cache, key->objectid, parent,
1663 index, namebuf, len, 0, key->type, error);
1665 len = sizeof(*extref) + name_len;
1666 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1673 static int count_csum_range(struct btrfs_root *root, u64 start,
1674 u64 len, u64 *found)
1676 struct btrfs_key key;
1677 struct btrfs_path path;
1678 struct extent_buffer *leaf;
1683 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1685 btrfs_init_path(&path);
1687 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1689 key.type = BTRFS_EXTENT_CSUM_KEY;
1691 ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1695 if (ret > 0 && path.slots[0] > 0) {
1696 leaf = path.nodes[0];
1697 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1698 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1699 key.type == BTRFS_EXTENT_CSUM_KEY)
1704 leaf = path.nodes[0];
1705 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1706 ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1711 leaf = path.nodes[0];
1714 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1715 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1716 key.type != BTRFS_EXTENT_CSUM_KEY)
1719 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1720 if (key.offset >= start + len)
1723 if (key.offset > start)
1726 size = btrfs_item_size_nr(leaf, path.slots[0]);
1727 csum_end = key.offset + (size / csum_size) * root->sectorsize;
1728 if (csum_end > start) {
1729 size = min(csum_end - start, len);
1738 btrfs_release_path(&path);
1744 static int process_file_extent(struct btrfs_root *root,
1745 struct extent_buffer *eb,
1746 int slot, struct btrfs_key *key,
1747 struct shared_node *active_node)
1749 struct inode_record *rec;
1750 struct btrfs_file_extent_item *fi;
1752 u64 disk_bytenr = 0;
1753 u64 extent_offset = 0;
1754 u64 mask = root->sectorsize - 1;
1758 rec = active_node->current;
1759 BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1760 rec->found_file_extent = 1;
1762 if (rec->extent_start == (u64)-1) {
1763 rec->extent_start = key->offset;
1764 rec->extent_end = key->offset;
1767 if (rec->extent_end > key->offset)
1768 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1769 else if (rec->extent_end < key->offset) {
1770 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1771 key->offset - rec->extent_end);
1776 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1777 extent_type = btrfs_file_extent_type(eb, fi);
1779 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1780 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1782 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1783 rec->found_size += num_bytes;
1784 num_bytes = (num_bytes + mask) & ~mask;
1785 } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1786 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1787 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1788 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1789 extent_offset = btrfs_file_extent_offset(eb, fi);
1790 if (num_bytes == 0 || (num_bytes & mask))
1791 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1792 if (num_bytes + extent_offset >
1793 btrfs_file_extent_ram_bytes(eb, fi))
1794 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1795 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1796 (btrfs_file_extent_compression(eb, fi) ||
1797 btrfs_file_extent_encryption(eb, fi) ||
1798 btrfs_file_extent_other_encoding(eb, fi)))
1799 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1800 if (disk_bytenr > 0)
1801 rec->found_size += num_bytes;
1803 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1805 rec->extent_end = key->offset + num_bytes;
1808 * The data reloc tree will copy full extents into its inode and then
1809 * copy the corresponding csums. Because the extent it copied could be
1810 * a preallocated extent that hasn't been written to yet there may be no
1811 * csums to copy, ergo we won't have csums for our file extent. This is
1812 * ok so just don't bother checking csums if the inode belongs to the
1815 if (disk_bytenr > 0 &&
1816 btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1818 if (btrfs_file_extent_compression(eb, fi))
1819 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1821 disk_bytenr += extent_offset;
1823 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1826 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1828 rec->found_csum_item = 1;
1829 if (found < num_bytes)
1830 rec->some_csum_missing = 1;
1831 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1833 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1839 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1840 struct walk_control *wc)
1842 struct btrfs_key key;
1846 struct cache_tree *inode_cache;
1847 struct shared_node *active_node;
1849 if (wc->root_level == wc->active_node &&
1850 btrfs_root_refs(&root->root_item) == 0)
1853 active_node = wc->nodes[wc->active_node];
1854 inode_cache = &active_node->inode_cache;
1855 nritems = btrfs_header_nritems(eb);
1856 for (i = 0; i < nritems; i++) {
1857 btrfs_item_key_to_cpu(eb, &key, i);
1859 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1861 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1864 if (active_node->current == NULL ||
1865 active_node->current->ino < key.objectid) {
1866 if (active_node->current) {
1867 active_node->current->checked = 1;
1868 maybe_free_inode_rec(inode_cache,
1869 active_node->current);
1871 active_node->current = get_inode_rec(inode_cache,
1873 BUG_ON(IS_ERR(active_node->current));
1876 case BTRFS_DIR_ITEM_KEY:
1877 case BTRFS_DIR_INDEX_KEY:
1878 ret = process_dir_item(root, eb, i, &key, active_node);
1880 case BTRFS_INODE_REF_KEY:
1881 ret = process_inode_ref(eb, i, &key, active_node);
1883 case BTRFS_INODE_EXTREF_KEY:
1884 ret = process_inode_extref(eb, i, &key, active_node);
1886 case BTRFS_INODE_ITEM_KEY:
1887 ret = process_inode_item(eb, i, &key, active_node);
1889 case BTRFS_EXTENT_DATA_KEY:
1890 ret = process_file_extent(root, eb, i, &key,
1900 static void reada_walk_down(struct btrfs_root *root,
1901 struct extent_buffer *node, int slot)
1910 level = btrfs_header_level(node);
1914 nritems = btrfs_header_nritems(node);
1915 blocksize = root->nodesize;
1916 for (i = slot; i < nritems; i++) {
1917 bytenr = btrfs_node_blockptr(node, i);
1918 ptr_gen = btrfs_node_ptr_generation(node, i);
1919 readahead_tree_block(root, bytenr, blocksize, ptr_gen);
1924 * Check the child node/leaf by the following condition:
1925 * 1. the first item key of the node/leaf should be the same with the one
1927 * 2. block in parent node should match the child node/leaf.
1928 * 3. generation of parent node and child's header should be consistent.
1930 * Or the child node/leaf pointed by the key in parent is not valid.
1932 * We hope to check leaf owner too, but since subvol may share leaves,
1933 * which makes leaf owner check not so strong, key check should be
1934 * sufficient enough for that case.
1936 static int check_child_node(struct btrfs_root *root,
1937 struct extent_buffer *parent, int slot,
1938 struct extent_buffer *child)
1940 struct btrfs_key parent_key;
1941 struct btrfs_key child_key;
1944 btrfs_node_key_to_cpu(parent, &parent_key, slot);
1945 if (btrfs_header_level(child) == 0)
1946 btrfs_item_key_to_cpu(child, &child_key, 0);
1948 btrfs_node_key_to_cpu(child, &child_key, 0);
1950 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1953 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1954 parent_key.objectid, parent_key.type, parent_key.offset,
1955 child_key.objectid, child_key.type, child_key.offset);
1957 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1959 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1960 btrfs_node_blockptr(parent, slot),
1961 btrfs_header_bytenr(child));
1963 if (btrfs_node_ptr_generation(parent, slot) !=
1964 btrfs_header_generation(child)) {
1966 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1967 btrfs_header_generation(child),
1968 btrfs_node_ptr_generation(parent, slot));
1974 u64 bytenr[BTRFS_MAX_LEVEL];
1975 u64 refs[BTRFS_MAX_LEVEL];
1978 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1979 struct walk_control *wc, int *level,
1980 struct node_refs *nrefs)
1982 enum btrfs_tree_block_status status;
1985 struct extent_buffer *next;
1986 struct extent_buffer *cur;
1991 WARN_ON(*level < 0);
1992 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1994 if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1995 refs = nrefs->refs[*level];
1998 ret = btrfs_lookup_extent_info(NULL, root,
1999 path->nodes[*level]->start,
2000 *level, 1, &refs, NULL);
2005 nrefs->bytenr[*level] = path->nodes[*level]->start;
2006 nrefs->refs[*level] = refs;
2010 ret = enter_shared_node(root, path->nodes[*level]->start,
2018 while (*level >= 0) {
2019 WARN_ON(*level < 0);
2020 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2021 cur = path->nodes[*level];
2023 if (btrfs_header_level(cur) != *level)
2026 if (path->slots[*level] >= btrfs_header_nritems(cur))
2029 ret = process_one_leaf(root, cur, wc);
2034 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2035 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2036 blocksize = root->nodesize;
2038 if (bytenr == nrefs->bytenr[*level - 1]) {
2039 refs = nrefs->refs[*level - 1];
2041 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2042 *level - 1, 1, &refs, NULL);
2046 nrefs->bytenr[*level - 1] = bytenr;
2047 nrefs->refs[*level - 1] = refs;
2052 ret = enter_shared_node(root, bytenr, refs,
2055 path->slots[*level]++;
2060 next = btrfs_find_tree_block(root, bytenr, blocksize);
2061 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2062 free_extent_buffer(next);
2063 reada_walk_down(root, cur, path->slots[*level]);
2064 next = read_tree_block(root, bytenr, blocksize,
2066 if (!extent_buffer_uptodate(next)) {
2067 struct btrfs_key node_key;
2069 btrfs_node_key_to_cpu(path->nodes[*level],
2071 path->slots[*level]);
2072 btrfs_add_corrupt_extent_record(root->fs_info,
2074 path->nodes[*level]->start,
2075 root->nodesize, *level);
2081 ret = check_child_node(root, cur, path->slots[*level], next);
2087 if (btrfs_is_leaf(next))
2088 status = btrfs_check_leaf(root, NULL, next);
2090 status = btrfs_check_node(root, NULL, next);
2091 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2092 free_extent_buffer(next);
2097 *level = *level - 1;
2098 free_extent_buffer(path->nodes[*level]);
2099 path->nodes[*level] = next;
2100 path->slots[*level] = 0;
2103 path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2107 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2108 struct walk_control *wc, int *level)
2111 struct extent_buffer *leaf;
2113 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2114 leaf = path->nodes[i];
2115 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2120 free_extent_buffer(path->nodes[*level]);
2121 path->nodes[*level] = NULL;
2122 BUG_ON(*level > wc->active_node);
2123 if (*level == wc->active_node)
2124 leave_shared_node(root, wc, *level);
2131 static int check_root_dir(struct inode_record *rec)
2133 struct inode_backref *backref;
2136 if (!rec->found_inode_item || rec->errors)
2138 if (rec->nlink != 1 || rec->found_link != 0)
2140 if (list_empty(&rec->backrefs))
2142 backref = to_inode_backref(rec->backrefs.next);
2143 if (!backref->found_inode_ref)
2145 if (backref->index != 0 || backref->namelen != 2 ||
2146 memcmp(backref->name, "..", 2))
2148 if (backref->found_dir_index || backref->found_dir_item)
2155 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2156 struct btrfs_root *root, struct btrfs_path *path,
2157 struct inode_record *rec)
2159 struct btrfs_inode_item *ei;
2160 struct btrfs_key key;
2163 key.objectid = rec->ino;
2164 key.type = BTRFS_INODE_ITEM_KEY;
2165 key.offset = (u64)-1;
2167 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2171 if (!path->slots[0]) {
2178 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2179 if (key.objectid != rec->ino) {
2184 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2185 struct btrfs_inode_item);
2186 btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2187 btrfs_mark_buffer_dirty(path->nodes[0]);
2188 rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2189 printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2190 root->root_key.objectid);
2192 btrfs_release_path(path);
2196 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2197 struct btrfs_root *root,
2198 struct btrfs_path *path,
2199 struct inode_record *rec)
2203 ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2204 btrfs_release_path(path);
2206 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2210 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2211 struct btrfs_root *root,
2212 struct btrfs_path *path,
2213 struct inode_record *rec)
2215 struct btrfs_inode_item *ei;
2216 struct btrfs_key key;
2219 key.objectid = rec->ino;
2220 key.type = BTRFS_INODE_ITEM_KEY;
2223 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2230 /* Since ret == 0, no need to check anything */
2231 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2232 struct btrfs_inode_item);
2233 btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2234 btrfs_mark_buffer_dirty(path->nodes[0]);
2235 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2236 printf("reset nbytes for ino %llu root %llu\n",
2237 rec->ino, root->root_key.objectid);
2239 btrfs_release_path(path);
2243 static int add_missing_dir_index(struct btrfs_root *root,
2244 struct cache_tree *inode_cache,
2245 struct inode_record *rec,
2246 struct inode_backref *backref)
2248 struct btrfs_path *path;
2249 struct btrfs_trans_handle *trans;
2250 struct btrfs_dir_item *dir_item;
2251 struct extent_buffer *leaf;
2252 struct btrfs_key key;
2253 struct btrfs_disk_key disk_key;
2254 struct inode_record *dir_rec;
2255 unsigned long name_ptr;
2256 u32 data_size = sizeof(*dir_item) + backref->namelen;
2259 path = btrfs_alloc_path();
2263 trans = btrfs_start_transaction(root, 1);
2264 if (IS_ERR(trans)) {
2265 btrfs_free_path(path);
2266 return PTR_ERR(trans);
2269 fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2270 (unsigned long long)rec->ino);
2271 key.objectid = backref->dir;
2272 key.type = BTRFS_DIR_INDEX_KEY;
2273 key.offset = backref->index;
2275 ret = btrfs_insert_empty_item(trans, root, path, &key, data_size);
2278 leaf = path->nodes[0];
2279 dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
2281 disk_key.objectid = cpu_to_le64(rec->ino);
2282 disk_key.type = BTRFS_INODE_ITEM_KEY;
2283 disk_key.offset = 0;
2285 btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2286 btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2287 btrfs_set_dir_data_len(leaf, dir_item, 0);
2288 btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2289 name_ptr = (unsigned long)(dir_item + 1);
2290 write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2291 btrfs_mark_buffer_dirty(leaf);
2292 btrfs_free_path(path);
2293 btrfs_commit_transaction(trans, root);
2295 backref->found_dir_index = 1;
2296 dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2297 BUG_ON(IS_ERR(dir_rec));
2300 dir_rec->found_size += backref->namelen;
2301 if (dir_rec->found_size == dir_rec->isize &&
2302 (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2303 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2304 if (dir_rec->found_size != dir_rec->isize)
2305 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2310 static int delete_dir_index(struct btrfs_root *root,
2311 struct cache_tree *inode_cache,
2312 struct inode_record *rec,
2313 struct inode_backref *backref)
2315 struct btrfs_trans_handle *trans;
2316 struct btrfs_dir_item *di;
2317 struct btrfs_path *path;
2320 path = btrfs_alloc_path();
2324 trans = btrfs_start_transaction(root, 1);
2325 if (IS_ERR(trans)) {
2326 btrfs_free_path(path);
2327 return PTR_ERR(trans);
2331 fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2332 (unsigned long long)backref->dir,
2333 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2334 (unsigned long long)root->objectid);
2336 di = btrfs_lookup_dir_index(trans, root, path, backref->dir,
2337 backref->name, backref->namelen,
2338 backref->index, -1);
2341 btrfs_free_path(path);
2342 btrfs_commit_transaction(trans, root);
2349 ret = btrfs_del_item(trans, root, path);
2351 ret = btrfs_delete_one_dir_name(trans, root, path, di);
2353 btrfs_free_path(path);
2354 btrfs_commit_transaction(trans, root);
2358 static int create_inode_item(struct btrfs_root *root,
2359 struct inode_record *rec,
2360 struct inode_backref *backref, int root_dir)
2362 struct btrfs_trans_handle *trans;
2363 struct btrfs_inode_item inode_item;
2364 time_t now = time(NULL);
2367 trans = btrfs_start_transaction(root, 1);
2368 if (IS_ERR(trans)) {
2369 ret = PTR_ERR(trans);
2373 fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
2374 "be incomplete, please check permissions and content after "
2375 "the fsck completes.\n", (unsigned long long)root->objectid,
2376 (unsigned long long)rec->ino);
2378 memset(&inode_item, 0, sizeof(inode_item));
2379 btrfs_set_stack_inode_generation(&inode_item, trans->transid);
2381 btrfs_set_stack_inode_nlink(&inode_item, 1);
2383 btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
2384 btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
2385 if (rec->found_dir_item) {
2386 if (rec->found_file_extent)
2387 fprintf(stderr, "root %llu inode %llu has both a dir "
2388 "item and extents, unsure if it is a dir or a "
2389 "regular file so setting it as a directory\n",
2390 (unsigned long long)root->objectid,
2391 (unsigned long long)rec->ino);
2392 btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
2393 btrfs_set_stack_inode_size(&inode_item, rec->found_size);
2394 } else if (!rec->found_dir_item) {
2395 btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
2396 btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
2398 btrfs_set_stack_timespec_sec(&inode_item.atime, now);
2399 btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
2400 btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
2401 btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
2402 btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
2403 btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
2404 btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
2405 btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
2407 ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
2409 btrfs_commit_transaction(trans, root);
2413 static int repair_inode_backrefs(struct btrfs_root *root,
2414 struct inode_record *rec,
2415 struct cache_tree *inode_cache,
2418 struct inode_backref *tmp, *backref;
2419 u64 root_dirid = btrfs_root_dirid(&root->root_item);
2423 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2424 if (!delete && rec->ino == root_dirid) {
2425 if (!rec->found_inode_item) {
2426 ret = create_inode_item(root, rec, backref, 1);
2433 /* Index 0 for root dir's are special, don't mess with it */
2434 if (rec->ino == root_dirid && backref->index == 0)
2438 ((backref->found_dir_index && !backref->found_inode_ref) ||
2439 (backref->found_dir_index && backref->found_inode_ref &&
2440 (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2441 ret = delete_dir_index(root, inode_cache, rec, backref);
2445 list_del(&backref->list);
2449 if (!delete && !backref->found_dir_index &&
2450 backref->found_dir_item && backref->found_inode_ref) {
2451 ret = add_missing_dir_index(root, inode_cache, rec,
2456 if (backref->found_dir_item &&
2457 backref->found_dir_index &&
2458 backref->found_dir_index) {
2459 if (!backref->errors &&
2460 backref->found_inode_ref) {
2461 list_del(&backref->list);
2467 if (!delete && (!backref->found_dir_index &&
2468 !backref->found_dir_item &&
2469 backref->found_inode_ref)) {
2470 struct btrfs_trans_handle *trans;
2471 struct btrfs_key location;
2473 ret = check_dir_conflict(root, backref->name,
2479 * let nlink fixing routine to handle it,
2480 * which can do it better.
2485 location.objectid = rec->ino;
2486 location.type = BTRFS_INODE_ITEM_KEY;
2487 location.offset = 0;
2489 trans = btrfs_start_transaction(root, 1);
2490 if (IS_ERR(trans)) {
2491 ret = PTR_ERR(trans);
2494 fprintf(stderr, "adding missing dir index/item pair "
2496 (unsigned long long)rec->ino);
2497 ret = btrfs_insert_dir_item(trans, root, backref->name,
2499 backref->dir, &location,
2500 imode_to_type(rec->imode),
2503 btrfs_commit_transaction(trans, root);
2507 if (!delete && (backref->found_inode_ref &&
2508 backref->found_dir_index &&
2509 backref->found_dir_item &&
2510 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2511 !rec->found_inode_item)) {
2512 ret = create_inode_item(root, rec, backref, 0);
2519 return ret ? ret : repaired;
2523 * To determine the file type for nlink/inode_item repair
2525 * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2526 * Return -ENOENT if file type is not found.
2528 static int find_file_type(struct inode_record *rec, u8 *type)
2530 struct inode_backref *backref;
2532 /* For inode item recovered case */
2533 if (rec->found_inode_item) {
2534 *type = imode_to_type(rec->imode);
2538 list_for_each_entry(backref, &rec->backrefs, list) {
2539 if (backref->found_dir_index || backref->found_dir_item) {
2540 *type = backref->filetype;
2548 * To determine the file name for nlink repair
2550 * Return 0 if file name is found, set name and namelen.
2551 * Return -ENOENT if file name is not found.
2553 static int find_file_name(struct inode_record *rec,
2554 char *name, int *namelen)
2556 struct inode_backref *backref;
2558 list_for_each_entry(backref, &rec->backrefs, list) {
2559 if (backref->found_dir_index || backref->found_dir_item ||
2560 backref->found_inode_ref) {
2561 memcpy(name, backref->name, backref->namelen);
2562 *namelen = backref->namelen;
2569 /* Reset the nlink of the inode to the correct one */
2570 static int reset_nlink(struct btrfs_trans_handle *trans,
2571 struct btrfs_root *root,
2572 struct btrfs_path *path,
2573 struct inode_record *rec)
2575 struct inode_backref *backref;
2576 struct inode_backref *tmp;
2577 struct btrfs_key key;
2578 struct btrfs_inode_item *inode_item;
2581 /* We don't believe this either, reset it and iterate backref */
2582 rec->found_link = 0;
2584 /* Remove all backref including the valid ones */
2585 list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2586 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2587 backref->index, backref->name,
2588 backref->namelen, 0);
2592 /* remove invalid backref, so it won't be added back */
2593 if (!(backref->found_dir_index &&
2594 backref->found_dir_item &&
2595 backref->found_inode_ref)) {
2596 list_del(&backref->list);
2603 /* Set nlink to 0 */
2604 key.objectid = rec->ino;
2605 key.type = BTRFS_INODE_ITEM_KEY;
2607 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2614 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2615 struct btrfs_inode_item);
2616 btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2617 btrfs_mark_buffer_dirty(path->nodes[0]);
2618 btrfs_release_path(path);
2621 * Add back valid inode_ref/dir_item/dir_index,
2622 * add_link() will handle the nlink inc, so new nlink must be correct
2624 list_for_each_entry(backref, &rec->backrefs, list) {
2625 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2626 backref->name, backref->namelen,
2627 backref->filetype, &backref->index, 1);
2632 btrfs_release_path(path);
2636 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
2637 struct btrfs_root *root,
2638 struct btrfs_path *path,
2639 struct inode_record *rec)
2641 char *dir_name = "lost+found";
2642 char namebuf[BTRFS_NAME_LEN] = {0};
2647 int name_recovered = 0;
2648 int type_recovered = 0;
2652 * Get file name and type first before these invalid inode ref
2653 * are deleted by remove_all_invalid_backref()
2655 name_recovered = !find_file_name(rec, namebuf, &namelen);
2656 type_recovered = !find_file_type(rec, &type);
2658 if (!name_recovered) {
2659 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
2660 rec->ino, rec->ino);
2661 namelen = count_digits(rec->ino);
2662 sprintf(namebuf, "%llu", rec->ino);
2665 if (!type_recovered) {
2666 printf("Can't get file type for inode %llu, using FILE as fallback\n",
2668 type = BTRFS_FT_REG_FILE;
2672 ret = reset_nlink(trans, root, path, rec);
2675 "Failed to reset nlink for inode %llu: %s\n",
2676 rec->ino, strerror(-ret));
2680 if (rec->found_link == 0) {
2681 lost_found_ino = root->highest_inode;
2682 if (lost_found_ino >= BTRFS_LAST_FREE_OBJECTID) {
2687 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
2688 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
2691 fprintf(stderr, "Failed to create '%s' dir: %s\n",
2692 dir_name, strerror(-ret));
2695 ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
2696 namebuf, namelen, type, NULL, 1);
2698 * Add ".INO" suffix several times to handle case where
2699 * "FILENAME.INO" is already taken by another file.
2701 while (ret == -EEXIST) {
2703 * Conflicting file name, add ".INO" as suffix * +1 for '.'
2705 if (namelen + count_digits(rec->ino) + 1 >
2710 snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
2712 namelen += count_digits(rec->ino) + 1;
2713 ret = btrfs_add_link(trans, root, rec->ino,
2714 lost_found_ino, namebuf,
2715 namelen, type, NULL, 1);
2719 "Failed to link the inode %llu to %s dir: %s\n",
2720 rec->ino, dir_name, strerror(-ret));
2724 * Just increase the found_link, don't actually add the
2725 * backref. This will make things easier and this inode
2726 * record will be freed after the repair is done.
2727 * So fsck will not report problem about this inode.
2730 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
2731 namelen, namebuf, dir_name);
2733 printf("Fixed the nlink of inode %llu\n", rec->ino);
2736 * Clear the flag anyway, or we will loop forever for the same inode
2737 * as it will not be removed from the bad inode list and the dead loop
2740 rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
2741 btrfs_release_path(path);
2746 * Check if there is any normal(reg or prealloc) file extent for given
2748 * This is used to determine the file type when neither its dir_index/item or
2749 * inode_item exists.
2751 * This will *NOT* report error, if any error happens, just consider it does
2752 * not have any normal file extent.
2754 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
2756 struct btrfs_path *path;
2757 struct btrfs_key key;
2758 struct btrfs_key found_key;
2759 struct btrfs_file_extent_item *fi;
2763 path = btrfs_alloc_path();
2767 key.type = BTRFS_EXTENT_DATA_KEY;
2770 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2775 if (ret && path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2776 ret = btrfs_next_leaf(root, path);
2783 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2785 if (found_key.objectid != ino ||
2786 found_key.type != BTRFS_EXTENT_DATA_KEY)
2788 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
2789 struct btrfs_file_extent_item);
2790 type = btrfs_file_extent_type(path->nodes[0], fi);
2791 if (type != BTRFS_FILE_EXTENT_INLINE) {
2797 btrfs_free_path(path);
2801 static u32 btrfs_type_to_imode(u8 type)
2803 static u32 imode_by_btrfs_type[] = {
2804 [BTRFS_FT_REG_FILE] = S_IFREG,
2805 [BTRFS_FT_DIR] = S_IFDIR,
2806 [BTRFS_FT_CHRDEV] = S_IFCHR,
2807 [BTRFS_FT_BLKDEV] = S_IFBLK,
2808 [BTRFS_FT_FIFO] = S_IFIFO,
2809 [BTRFS_FT_SOCK] = S_IFSOCK,
2810 [BTRFS_FT_SYMLINK] = S_IFLNK,
2813 return imode_by_btrfs_type[(type)];
2816 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
2817 struct btrfs_root *root,
2818 struct btrfs_path *path,
2819 struct inode_record *rec)
2823 int type_recovered = 0;
2826 printf("Trying to rebuild inode:%llu\n", rec->ino);
2828 type_recovered = !find_file_type(rec, &filetype);
2831 * Try to determine inode type if type not found.
2833 * For found regular file extent, it must be FILE.
2834 * For found dir_item/index, it must be DIR.
2836 * For undetermined one, use FILE as fallback.
2839 * 1. If found backref(inode_index/item is already handled) to it,
2841 * Need new inode-inode ref structure to allow search for that.
2843 if (!type_recovered) {
2844 if (rec->found_file_extent &&
2845 find_normal_file_extent(root, rec->ino)) {
2847 filetype = BTRFS_FT_REG_FILE;
2848 } else if (rec->found_dir_item) {
2850 filetype = BTRFS_FT_DIR;
2851 } else if (!list_empty(&rec->orphan_extents)) {
2853 filetype = BTRFS_FT_REG_FILE;
2855 printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
2858 filetype = BTRFS_FT_REG_FILE;
2862 ret = btrfs_new_inode(trans, root, rec->ino,
2863 mode | btrfs_type_to_imode(filetype));
2868 * Here inode rebuild is done, we only rebuild the inode item,
2869 * don't repair the nlink(like move to lost+found).
2870 * That is the job of nlink repair.
2872 * We just fill the record and return
2874 rec->found_dir_item = 1;
2875 rec->imode = mode | btrfs_type_to_imode(filetype);
2877 rec->errors &= ~I_ERR_NO_INODE_ITEM;
2878 /* Ensure the inode_nlinks repair function will be called */
2879 rec->errors |= I_ERR_LINK_COUNT_WRONG;
2884 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root,
2886 struct btrfs_path *path,
2887 struct inode_record *rec)
2889 struct orphan_data_extent *orphan;
2890 struct orphan_data_extent *tmp;
2893 list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
2895 * Check for conflicting file extents
2897 * Here we don't know whether the extents is compressed or not,
2898 * so we can only assume it not compressed nor data offset,
2899 * and use its disk_len as extent length.
2901 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
2902 orphan->offset, orphan->disk_len, 0);
2903 btrfs_release_path(path);
2908 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
2909 orphan->disk_bytenr, orphan->disk_len);
2910 ret = btrfs_free_extent(trans,
2911 root->fs_info->extent_root,
2912 orphan->disk_bytenr, orphan->disk_len,
2913 0, root->objectid, orphan->objectid,
2918 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
2919 orphan->offset, orphan->disk_bytenr,
2920 orphan->disk_len, orphan->disk_len);
2924 /* Update file size info */
2925 rec->found_size += orphan->disk_len;
2926 if (rec->found_size == rec->nbytes)
2927 rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2929 /* Update the file extent hole info too */
2930 ret = del_file_extent_hole(&rec->holes, orphan->offset,
2934 if (RB_EMPTY_ROOT(&rec->holes))
2935 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2937 list_del(&orphan->list);
2940 rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
2945 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
2946 struct btrfs_root *root,
2947 struct btrfs_path *path,
2948 struct inode_record *rec)
2950 struct rb_node *node;
2951 struct file_extent_hole *hole;
2955 node = rb_first(&rec->holes);
2959 hole = rb_entry(node, struct file_extent_hole, node);
2960 ret = btrfs_punch_hole(trans, root, rec->ino,
2961 hole->start, hole->len);
2964 ret = del_file_extent_hole(&rec->holes, hole->start,
2968 if (RB_EMPTY_ROOT(&rec->holes))
2969 rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
2970 node = rb_first(&rec->holes);
2972 /* special case for a file losing all its file extent */
2974 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
2975 round_up(rec->isize, root->sectorsize));
2979 printf("Fixed discount file extents for inode: %llu in root: %llu\n",
2980 rec->ino, root->objectid);
2985 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
2987 struct btrfs_trans_handle *trans;
2988 struct btrfs_path *path;
2991 if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
2992 I_ERR_NO_ORPHAN_ITEM |
2993 I_ERR_LINK_COUNT_WRONG |
2994 I_ERR_NO_INODE_ITEM |
2995 I_ERR_FILE_EXTENT_ORPHAN |
2996 I_ERR_FILE_EXTENT_DISCOUNT|
2997 I_ERR_FILE_NBYTES_WRONG)))
3000 path = btrfs_alloc_path();
3005 * For nlink repair, it may create a dir and add link, so
3006 * 2 for parent(256)'s dir_index and dir_item
3007 * 2 for lost+found dir's inode_item and inode_ref
3008 * 1 for the new inode_ref of the file
3009 * 2 for lost+found dir's dir_index and dir_item for the file
3011 trans = btrfs_start_transaction(root, 7);
3012 if (IS_ERR(trans)) {
3013 btrfs_free_path(path);
3014 return PTR_ERR(trans);
3017 if (rec->errors & I_ERR_NO_INODE_ITEM)
3018 ret = repair_inode_no_item(trans, root, path, rec);
3019 if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3020 ret = repair_inode_orphan_extent(trans, root, path, rec);
3021 if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3022 ret = repair_inode_discount_extent(trans, root, path, rec);
3023 if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3024 ret = repair_inode_isize(trans, root, path, rec);
3025 if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3026 ret = repair_inode_orphan_item(trans, root, path, rec);
3027 if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3028 ret = repair_inode_nlinks(trans, root, path, rec);
3029 if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3030 ret = repair_inode_nbytes(trans, root, path, rec);
3031 btrfs_commit_transaction(trans, root);
3032 btrfs_free_path(path);
3036 static int check_inode_recs(struct btrfs_root *root,
3037 struct cache_tree *inode_cache)
3039 struct cache_extent *cache;
3040 struct ptr_node *node;
3041 struct inode_record *rec;
3042 struct inode_backref *backref;
3047 u64 root_dirid = btrfs_root_dirid(&root->root_item);
3049 if (btrfs_root_refs(&root->root_item) == 0) {
3050 if (!cache_tree_empty(inode_cache))
3051 fprintf(stderr, "warning line %d\n", __LINE__);
3056 * We need to record the highest inode number for later 'lost+found'
3058 * We must select an ino not used/referred by any existing inode, or
3059 * 'lost+found' ino may be a missing ino in a corrupted leaf,
3060 * this may cause 'lost+found' dir has wrong nlinks.
3062 cache = last_cache_extent(inode_cache);
3064 node = container_of(cache, struct ptr_node, cache);
3066 if (rec->ino > root->highest_inode)
3067 root->highest_inode = rec->ino;
3071 * We need to repair backrefs first because we could change some of the
3072 * errors in the inode recs.
3074 * We also need to go through and delete invalid backrefs first and then
3075 * add the correct ones second. We do this because we may get EEXIST
3076 * when adding back the correct index because we hadn't yet deleted the
3079 * For example, if we were missing a dir index then the directories
3080 * isize would be wrong, so if we fixed the isize to what we thought it
3081 * would be and then fixed the backref we'd still have a invalid fs, so
3082 * we need to add back the dir index and then check to see if the isize
3087 if (stage == 3 && !err)
3090 cache = search_cache_extent(inode_cache, 0);
3091 while (repair && cache) {
3092 node = container_of(cache, struct ptr_node, cache);
3094 cache = next_cache_extent(cache);
3096 /* Need to free everything up and rescan */
3098 remove_cache_extent(inode_cache, &node->cache);
3100 free_inode_rec(rec);
3104 if (list_empty(&rec->backrefs))
3107 ret = repair_inode_backrefs(root, rec, inode_cache,
3121 rec = get_inode_rec(inode_cache, root_dirid, 0);
3122 BUG_ON(IS_ERR(rec));
3124 ret = check_root_dir(rec);
3126 fprintf(stderr, "root %llu root dir %llu error\n",
3127 (unsigned long long)root->root_key.objectid,
3128 (unsigned long long)root_dirid);
3129 print_inode_error(root, rec);
3134 struct btrfs_trans_handle *trans;
3136 trans = btrfs_start_transaction(root, 1);
3137 if (IS_ERR(trans)) {
3138 err = PTR_ERR(trans);
3143 "root %llu missing its root dir, recreating\n",
3144 (unsigned long long)root->objectid);
3146 ret = btrfs_make_root_dir(trans, root, root_dirid);
3149 btrfs_commit_transaction(trans, root);
3153 fprintf(stderr, "root %llu root dir %llu not found\n",
3154 (unsigned long long)root->root_key.objectid,
3155 (unsigned long long)root_dirid);
3159 cache = search_cache_extent(inode_cache, 0);
3162 node = container_of(cache, struct ptr_node, cache);
3164 remove_cache_extent(inode_cache, &node->cache);
3166 if (rec->ino == root_dirid ||
3167 rec->ino == BTRFS_ORPHAN_OBJECTID) {
3168 free_inode_rec(rec);
3172 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3173 ret = check_orphan_item(root, rec->ino);
3175 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3176 if (can_free_inode_rec(rec)) {
3177 free_inode_rec(rec);
3182 if (!rec->found_inode_item)
3183 rec->errors |= I_ERR_NO_INODE_ITEM;
3184 if (rec->found_link != rec->nlink)
3185 rec->errors |= I_ERR_LINK_COUNT_WRONG;
3187 ret = try_repair_inode(root, rec);
3188 if (ret == 0 && can_free_inode_rec(rec)) {
3189 free_inode_rec(rec);
3195 if (!(repair && ret == 0))
3197 print_inode_error(root, rec);
3198 list_for_each_entry(backref, &rec->backrefs, list) {
3199 if (!backref->found_dir_item)
3200 backref->errors |= REF_ERR_NO_DIR_ITEM;
3201 if (!backref->found_dir_index)
3202 backref->errors |= REF_ERR_NO_DIR_INDEX;
3203 if (!backref->found_inode_ref)
3204 backref->errors |= REF_ERR_NO_INODE_REF;
3205 fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3206 " namelen %u name %s filetype %d errors %x",
3207 (unsigned long long)backref->dir,
3208 (unsigned long long)backref->index,
3209 backref->namelen, backref->name,
3210 backref->filetype, backref->errors);
3211 print_ref_error(backref->errors);
3213 free_inode_rec(rec);
3215 return (error > 0) ? -1 : 0;
3218 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3221 struct cache_extent *cache;
3222 struct root_record *rec = NULL;
3225 cache = lookup_cache_extent(root_cache, objectid, 1);
3227 rec = container_of(cache, struct root_record, cache);
3229 rec = calloc(1, sizeof(*rec));
3231 return ERR_PTR(-ENOMEM);
3232 rec->objectid = objectid;
3233 INIT_LIST_HEAD(&rec->backrefs);
3234 rec->cache.start = objectid;
3235 rec->cache.size = 1;
3237 ret = insert_cache_extent(root_cache, &rec->cache);
3239 return ERR_PTR(-EEXIST);
3244 static struct root_backref *get_root_backref(struct root_record *rec,
3245 u64 ref_root, u64 dir, u64 index,
3246 const char *name, int namelen)
3248 struct root_backref *backref;
3250 list_for_each_entry(backref, &rec->backrefs, list) {
3251 if (backref->ref_root != ref_root || backref->dir != dir ||
3252 backref->namelen != namelen)
3254 if (memcmp(name, backref->name, namelen))
3259 backref = calloc(1, sizeof(*backref) + namelen + 1);
3262 backref->ref_root = ref_root;
3264 backref->index = index;
3265 backref->namelen = namelen;
3266 memcpy(backref->name, name, namelen);
3267 backref->name[namelen] = '\0';
3268 list_add_tail(&backref->list, &rec->backrefs);
3272 static void free_root_record(struct cache_extent *cache)
3274 struct root_record *rec;
3275 struct root_backref *backref;
3277 rec = container_of(cache, struct root_record, cache);
3278 while (!list_empty(&rec->backrefs)) {
3279 backref = to_root_backref(rec->backrefs.next);
3280 list_del(&backref->list);
3287 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3289 static int add_root_backref(struct cache_tree *root_cache,
3290 u64 root_id, u64 ref_root, u64 dir, u64 index,
3291 const char *name, int namelen,
3292 int item_type, int errors)
3294 struct root_record *rec;
3295 struct root_backref *backref;
3297 rec = get_root_rec(root_cache, root_id);
3298 BUG_ON(IS_ERR(rec));
3299 backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3302 backref->errors |= errors;
3304 if (item_type != BTRFS_DIR_ITEM_KEY) {
3305 if (backref->found_dir_index || backref->found_back_ref ||
3306 backref->found_forward_ref) {
3307 if (backref->index != index)
3308 backref->errors |= REF_ERR_INDEX_UNMATCH;
3310 backref->index = index;
3314 if (item_type == BTRFS_DIR_ITEM_KEY) {
3315 if (backref->found_forward_ref)
3317 backref->found_dir_item = 1;
3318 } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3319 backref->found_dir_index = 1;
3320 } else if (item_type == BTRFS_ROOT_REF_KEY) {
3321 if (backref->found_forward_ref)
3322 backref->errors |= REF_ERR_DUP_ROOT_REF;
3323 else if (backref->found_dir_item)
3325 backref->found_forward_ref = 1;
3326 } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3327 if (backref->found_back_ref)
3328 backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3329 backref->found_back_ref = 1;
3334 if (backref->found_forward_ref && backref->found_dir_item)
3335 backref->reachable = 1;
3339 static int merge_root_recs(struct btrfs_root *root,
3340 struct cache_tree *src_cache,
3341 struct cache_tree *dst_cache)
3343 struct cache_extent *cache;
3344 struct ptr_node *node;
3345 struct inode_record *rec;
3346 struct inode_backref *backref;
3349 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3350 free_inode_recs_tree(src_cache);
3355 cache = search_cache_extent(src_cache, 0);
3358 node = container_of(cache, struct ptr_node, cache);
3360 remove_cache_extent(src_cache, &node->cache);
3363 ret = is_child_root(root, root->objectid, rec->ino);
3369 list_for_each_entry(backref, &rec->backrefs, list) {
3370 BUG_ON(backref->found_inode_ref);
3371 if (backref->found_dir_item)
3372 add_root_backref(dst_cache, rec->ino,
3373 root->root_key.objectid, backref->dir,
3374 backref->index, backref->name,
3375 backref->namelen, BTRFS_DIR_ITEM_KEY,
3377 if (backref->found_dir_index)
3378 add_root_backref(dst_cache, rec->ino,
3379 root->root_key.objectid, backref->dir,
3380 backref->index, backref->name,
3381 backref->namelen, BTRFS_DIR_INDEX_KEY,
3385 free_inode_rec(rec);
3392 static int check_root_refs(struct btrfs_root *root,
3393 struct cache_tree *root_cache)
3395 struct root_record *rec;
3396 struct root_record *ref_root;
3397 struct root_backref *backref;
3398 struct cache_extent *cache;
3404 rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3405 BUG_ON(IS_ERR(rec));
3408 /* fixme: this can not detect circular references */
3411 cache = search_cache_extent(root_cache, 0);
3415 rec = container_of(cache, struct root_record, cache);
3416 cache = next_cache_extent(cache);
3418 if (rec->found_ref == 0)
3421 list_for_each_entry(backref, &rec->backrefs, list) {
3422 if (!backref->reachable)
3425 ref_root = get_root_rec(root_cache,
3427 BUG_ON(IS_ERR(ref_root));
3428 if (ref_root->found_ref > 0)
3431 backref->reachable = 0;
3433 if (rec->found_ref == 0)
3439 cache = search_cache_extent(root_cache, 0);
3443 rec = container_of(cache, struct root_record, cache);
3444 cache = next_cache_extent(cache);
3446 if (rec->found_ref == 0 &&
3447 rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3448 rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3449 ret = check_orphan_item(root->fs_info->tree_root,
3455 * If we don't have a root item then we likely just have
3456 * a dir item in a snapshot for this root but no actual
3457 * ref key or anything so it's meaningless.
3459 if (!rec->found_root_item)
3462 fprintf(stderr, "fs tree %llu not referenced\n",
3463 (unsigned long long)rec->objectid);
3467 if (rec->found_ref > 0 && !rec->found_root_item)
3469 list_for_each_entry(backref, &rec->backrefs, list) {
3470 if (!backref->found_dir_item)
3471 backref->errors |= REF_ERR_NO_DIR_ITEM;
3472 if (!backref->found_dir_index)
3473 backref->errors |= REF_ERR_NO_DIR_INDEX;
3474 if (!backref->found_back_ref)
3475 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3476 if (!backref->found_forward_ref)
3477 backref->errors |= REF_ERR_NO_ROOT_REF;
3478 if (backref->reachable && backref->errors)
3485 fprintf(stderr, "fs tree %llu refs %u %s\n",
3486 (unsigned long long)rec->objectid, rec->found_ref,
3487 rec->found_root_item ? "" : "not found");
3489 list_for_each_entry(backref, &rec->backrefs, list) {
3490 if (!backref->reachable)
3492 if (!backref->errors && rec->found_root_item)
3494 fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3495 " index %llu namelen %u name %s errors %x\n",
3496 (unsigned long long)backref->ref_root,
3497 (unsigned long long)backref->dir,
3498 (unsigned long long)backref->index,
3499 backref->namelen, backref->name,
3501 print_ref_error(backref->errors);
3504 return errors > 0 ? 1 : 0;
3507 static int process_root_ref(struct extent_buffer *eb, int slot,
3508 struct btrfs_key *key,
3509 struct cache_tree *root_cache)
3515 struct btrfs_root_ref *ref;
3516 char namebuf[BTRFS_NAME_LEN];
3519 ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3521 dirid = btrfs_root_ref_dirid(eb, ref);
3522 index = btrfs_root_ref_sequence(eb, ref);
3523 name_len = btrfs_root_ref_name_len(eb, ref);
3525 if (name_len <= BTRFS_NAME_LEN) {
3529 len = BTRFS_NAME_LEN;
3530 error = REF_ERR_NAME_TOO_LONG;
3532 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3534 if (key->type == BTRFS_ROOT_REF_KEY) {
3535 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3536 index, namebuf, len, key->type, error);
3538 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3539 index, namebuf, len, key->type, error);
3544 static void free_corrupt_block(struct cache_extent *cache)
3546 struct btrfs_corrupt_block *corrupt;
3548 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3552 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3555 * Repair the btree of the given root.
3557 * The fix is to remove the node key in corrupt_blocks cache_tree.
3558 * and rebalance the tree.
3559 * After the fix, the btree should be writeable.
3561 static int repair_btree(struct btrfs_root *root,
3562 struct cache_tree *corrupt_blocks)
3564 struct btrfs_trans_handle *trans;
3565 struct btrfs_path *path;
3566 struct btrfs_corrupt_block *corrupt;
3567 struct cache_extent *cache;
3568 struct btrfs_key key;
3573 if (cache_tree_empty(corrupt_blocks))
3576 path = btrfs_alloc_path();
3580 trans = btrfs_start_transaction(root, 1);
3581 if (IS_ERR(trans)) {
3582 ret = PTR_ERR(trans);
3583 fprintf(stderr, "Error starting transaction: %s\n",
3587 cache = first_cache_extent(corrupt_blocks);
3589 corrupt = container_of(cache, struct btrfs_corrupt_block,
3591 level = corrupt->level;
3592 path->lowest_level = level;
3593 key.objectid = corrupt->key.objectid;
3594 key.type = corrupt->key.type;
3595 key.offset = corrupt->key.offset;
3598 * Here we don't want to do any tree balance, since it may
3599 * cause a balance with corrupted brother leaf/node,
3600 * so ins_len set to 0 here.
3601 * Balance will be done after all corrupt node/leaf is deleted.
3603 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3606 offset = btrfs_node_blockptr(path->nodes[level],
3607 path->slots[level]);
3609 /* Remove the ptr */
3610 ret = btrfs_del_ptr(trans, root, path, level,
3611 path->slots[level]);
3615 * Remove the corresponding extent
3616 * return value is not concerned.
3618 btrfs_release_path(path);
3619 ret = btrfs_free_extent(trans, root, offset, root->nodesize,
3620 0, root->root_key.objectid,
3622 cache = next_cache_extent(cache);
3625 /* Balance the btree using btrfs_search_slot() */
3626 cache = first_cache_extent(corrupt_blocks);
3628 corrupt = container_of(cache, struct btrfs_corrupt_block,
3630 memcpy(&key, &corrupt->key, sizeof(key));
3631 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3634 /* return will always >0 since it won't find the item */
3636 btrfs_release_path(path);
3637 cache = next_cache_extent(cache);
3640 btrfs_commit_transaction(trans, root);
3642 btrfs_free_path(path);
3646 static int check_fs_root(struct btrfs_root *root,
3647 struct cache_tree *root_cache,
3648 struct walk_control *wc)
3654 struct btrfs_path path;
3655 struct shared_node root_node;
3656 struct root_record *rec;
3657 struct btrfs_root_item *root_item = &root->root_item;
3658 struct cache_tree corrupt_blocks;
3659 struct orphan_data_extent *orphan;
3660 struct orphan_data_extent *tmp;
3661 enum btrfs_tree_block_status status;
3662 struct node_refs nrefs;
3665 * Reuse the corrupt_block cache tree to record corrupted tree block
3667 * Unlike the usage in extent tree check, here we do it in a per
3668 * fs/subvol tree base.
3670 cache_tree_init(&corrupt_blocks);
3671 root->fs_info->corrupt_blocks = &corrupt_blocks;
3673 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
3674 rec = get_root_rec(root_cache, root->root_key.objectid);
3675 BUG_ON(IS_ERR(rec));
3676 if (btrfs_root_refs(root_item) > 0)
3677 rec->found_root_item = 1;
3680 btrfs_init_path(&path);
3681 memset(&root_node, 0, sizeof(root_node));
3682 cache_tree_init(&root_node.root_cache);
3683 cache_tree_init(&root_node.inode_cache);
3684 memset(&nrefs, 0, sizeof(nrefs));
3686 /* Move the orphan extent record to corresponding inode_record */
3687 list_for_each_entry_safe(orphan, tmp,
3688 &root->orphan_data_extents, list) {
3689 struct inode_record *inode;
3691 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
3693 BUG_ON(IS_ERR(inode));
3694 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
3695 list_move(&orphan->list, &inode->orphan_extents);
3698 level = btrfs_header_level(root->node);
3699 memset(wc->nodes, 0, sizeof(wc->nodes));
3700 wc->nodes[level] = &root_node;
3701 wc->active_node = level;
3702 wc->root_level = level;
3704 /* We may not have checked the root block, lets do that now */
3705 if (btrfs_is_leaf(root->node))
3706 status = btrfs_check_leaf(root, NULL, root->node);
3708 status = btrfs_check_node(root, NULL, root->node);
3709 if (status != BTRFS_TREE_BLOCK_CLEAN)
3712 if (btrfs_root_refs(root_item) > 0 ||
3713 btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3714 path.nodes[level] = root->node;
3715 extent_buffer_get(root->node);
3716 path.slots[level] = 0;
3718 struct btrfs_key key;
3719 struct btrfs_disk_key found_key;
3721 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3722 level = root_item->drop_level;
3723 path.lowest_level = level;
3724 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3727 btrfs_node_key(path.nodes[level], &found_key,
3729 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3730 sizeof(found_key)));
3734 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
3740 wret = walk_up_tree(root, &path, wc, &level);
3747 btrfs_release_path(&path);
3749 if (!cache_tree_empty(&corrupt_blocks)) {
3750 struct cache_extent *cache;
3751 struct btrfs_corrupt_block *corrupt;
3753 printf("The following tree block(s) is corrupted in tree %llu:\n",
3754 root->root_key.objectid);
3755 cache = first_cache_extent(&corrupt_blocks);
3757 corrupt = container_of(cache,
3758 struct btrfs_corrupt_block,
3760 printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
3761 cache->start, corrupt->level,
3762 corrupt->key.objectid, corrupt->key.type,
3763 corrupt->key.offset);
3764 cache = next_cache_extent(cache);
3767 printf("Try to repair the btree for root %llu\n",
3768 root->root_key.objectid);
3769 ret = repair_btree(root, &corrupt_blocks);
3771 fprintf(stderr, "Failed to repair btree: %s\n",
3774 printf("Btree for root %llu is fixed\n",
3775 root->root_key.objectid);
3779 err = merge_root_recs(root, &root_node.root_cache, root_cache);
3783 if (root_node.current) {
3784 root_node.current->checked = 1;
3785 maybe_free_inode_rec(&root_node.inode_cache,
3789 err = check_inode_recs(root, &root_node.inode_cache);
3793 free_corrupt_blocks_tree(&corrupt_blocks);
3794 root->fs_info->corrupt_blocks = NULL;
3795 free_orphan_data_extents(&root->orphan_data_extents);
3799 static int fs_root_objectid(u64 objectid)
3801 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
3802 objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
3804 return is_fstree(objectid);
3807 static int check_fs_roots(struct btrfs_root *root,
3808 struct cache_tree *root_cache)
3810 struct btrfs_path path;
3811 struct btrfs_key key;
3812 struct walk_control wc;
3813 struct extent_buffer *leaf, *tree_node;
3814 struct btrfs_root *tmp_root;
3815 struct btrfs_root *tree_root = root->fs_info->tree_root;
3819 if (ctx.progress_enabled) {
3820 ctx.tp = TASK_FS_ROOTS;
3821 task_start(ctx.info);
3825 * Just in case we made any changes to the extent tree that weren't
3826 * reflected into the free space cache yet.
3829 reset_cached_block_groups(root->fs_info);
3830 memset(&wc, 0, sizeof(wc));
3831 cache_tree_init(&wc.shared);
3832 btrfs_init_path(&path);
3837 key.type = BTRFS_ROOT_ITEM_KEY;
3838 ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
3843 tree_node = tree_root->node;
3845 if (tree_node != tree_root->node) {
3846 free_root_recs_tree(root_cache);
3847 btrfs_release_path(&path);
3850 leaf = path.nodes[0];
3851 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
3852 ret = btrfs_next_leaf(tree_root, &path);
3858 leaf = path.nodes[0];
3860 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
3861 if (key.type == BTRFS_ROOT_ITEM_KEY &&
3862 fs_root_objectid(key.objectid)) {
3863 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3864 tmp_root = btrfs_read_fs_root_no_cache(
3865 root->fs_info, &key);
3867 key.offset = (u64)-1;
3868 tmp_root = btrfs_read_fs_root(
3869 root->fs_info, &key);
3871 if (IS_ERR(tmp_root)) {
3875 ret = check_fs_root(tmp_root, root_cache, &wc);
3876 if (ret == -EAGAIN) {
3877 free_root_recs_tree(root_cache);
3878 btrfs_release_path(&path);
3883 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
3884 btrfs_free_fs_root(tmp_root);
3885 } else if (key.type == BTRFS_ROOT_REF_KEY ||
3886 key.type == BTRFS_ROOT_BACKREF_KEY) {
3887 process_root_ref(leaf, path.slots[0], &key,
3894 btrfs_release_path(&path);
3896 free_extent_cache_tree(&wc.shared);
3897 if (!cache_tree_empty(&wc.shared))
3898 fprintf(stderr, "warning line %d\n", __LINE__);
3900 task_stop(ctx.info);
3905 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
3908 struct extent_backref *back;
3909 struct tree_backref *tback;
3910 struct data_backref *dback;
3914 for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
3915 back = rb_node_to_extent_backref(n);
3916 if (!back->found_extent_tree) {
3920 if (back->is_data) {
3921 dback = to_data_backref(back);
3922 fprintf(stderr, "Backref %llu %s %llu"
3923 " owner %llu offset %llu num_refs %lu"
3924 " not found in extent tree\n",
3925 (unsigned long long)rec->start,
3926 back->full_backref ?
3928 back->full_backref ?
3929 (unsigned long long)dback->parent:
3930 (unsigned long long)dback->root,
3931 (unsigned long long)dback->owner,
3932 (unsigned long long)dback->offset,
3933 (unsigned long)dback->num_refs);
3935 tback = to_tree_backref(back);
3936 fprintf(stderr, "Backref %llu parent %llu"
3937 " root %llu not found in extent tree\n",
3938 (unsigned long long)rec->start,
3939 (unsigned long long)tback->parent,
3940 (unsigned long long)tback->root);
3943 if (!back->is_data && !back->found_ref) {
3947 tback = to_tree_backref(back);
3948 fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
3949 (unsigned long long)rec->start,
3950 back->full_backref ? "parent" : "root",
3951 back->full_backref ?
3952 (unsigned long long)tback->parent :
3953 (unsigned long long)tback->root, back);
3955 if (back->is_data) {
3956 dback = to_data_backref(back);
3957 if (dback->found_ref != dback->num_refs) {
3961 fprintf(stderr, "Incorrect local backref count"
3962 " on %llu %s %llu owner %llu"
3963 " offset %llu found %u wanted %u back %p\n",
3964 (unsigned long long)rec->start,
3965 back->full_backref ?
3967 back->full_backref ?
3968 (unsigned long long)dback->parent:
3969 (unsigned long long)dback->root,
3970 (unsigned long long)dback->owner,
3971 (unsigned long long)dback->offset,
3972 dback->found_ref, dback->num_refs, back);
3974 if (dback->disk_bytenr != rec->start) {
3978 fprintf(stderr, "Backref disk bytenr does not"
3979 " match extent record, bytenr=%llu, "
3980 "ref bytenr=%llu\n",
3981 (unsigned long long)rec->start,
3982 (unsigned long long)dback->disk_bytenr);
3985 if (dback->bytes != rec->nr) {
3989 fprintf(stderr, "Backref bytes do not match "
3990 "extent backref, bytenr=%llu, ref "
3991 "bytes=%llu, backref bytes=%llu\n",
3992 (unsigned long long)rec->start,
3993 (unsigned long long)rec->nr,
3994 (unsigned long long)dback->bytes);
3997 if (!back->is_data) {
4000 dback = to_data_backref(back);
4001 found += dback->found_ref;
4004 if (found != rec->refs) {
4008 fprintf(stderr, "Incorrect global backref count "
4009 "on %llu found %llu wanted %llu\n",
4010 (unsigned long long)rec->start,
4011 (unsigned long long)found,
4012 (unsigned long long)rec->refs);
4018 static void __free_one_backref(struct rb_node *node)
4020 struct extent_backref *back = rb_node_to_extent_backref(node);
4025 static void free_all_extent_backrefs(struct extent_record *rec)
4027 rb_free_nodes(&rec->backref_tree, __free_one_backref);
4030 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
4031 struct cache_tree *extent_cache)
4033 struct cache_extent *cache;
4034 struct extent_record *rec;
4037 cache = first_cache_extent(extent_cache);
4040 rec = container_of(cache, struct extent_record, cache);
4041 remove_cache_extent(extent_cache, cache);
4042 free_all_extent_backrefs(rec);
4047 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
4048 struct extent_record *rec)
4050 if (rec->content_checked && rec->owner_ref_checked &&
4051 rec->extent_item_refs == rec->refs && rec->refs > 0 &&
4052 rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
4053 !rec->bad_full_backref && !rec->crossing_stripes &&
4054 !rec->wrong_chunk_type) {
4055 remove_cache_extent(extent_cache, &rec->cache);
4056 free_all_extent_backrefs(rec);
4057 list_del_init(&rec->list);
4063 static int check_owner_ref(struct btrfs_root *root,
4064 struct extent_record *rec,
4065 struct extent_buffer *buf)
4067 struct extent_backref *node, *tmp;
4068 struct tree_backref *back;
4069 struct btrfs_root *ref_root;
4070 struct btrfs_key key;
4071 struct btrfs_path path;
4072 struct extent_buffer *parent;
4077 rbtree_postorder_for_each_entry_safe(node, tmp,
4078 &rec->backref_tree, node) {
4081 if (!node->found_ref)
4083 if (node->full_backref)
4085 back = to_tree_backref(node);
4086 if (btrfs_header_owner(buf) == back->root)
4089 BUG_ON(rec->is_root);
4091 /* try to find the block by search corresponding fs tree */
4092 key.objectid = btrfs_header_owner(buf);
4093 key.type = BTRFS_ROOT_ITEM_KEY;
4094 key.offset = (u64)-1;
4096 ref_root = btrfs_read_fs_root(root->fs_info, &key);
4097 if (IS_ERR(ref_root))
4100 level = btrfs_header_level(buf);
4102 btrfs_item_key_to_cpu(buf, &key, 0);
4104 btrfs_node_key_to_cpu(buf, &key, 0);
4106 btrfs_init_path(&path);
4107 path.lowest_level = level + 1;
4108 ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
4112 parent = path.nodes[level + 1];
4113 if (parent && buf->start == btrfs_node_blockptr(parent,
4114 path.slots[level + 1]))
4117 btrfs_release_path(&path);
4118 return found ? 0 : 1;
4121 static int is_extent_tree_record(struct extent_record *rec)
4123 struct extent_backref *ref, *tmp;
4124 struct tree_backref *back;
4127 rbtree_postorder_for_each_entry_safe(ref, tmp,
4128 &rec->backref_tree, node) {
4131 back = to_tree_backref(ref);
4132 if (ref->full_backref)
4134 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
4141 static int record_bad_block_io(struct btrfs_fs_info *info,
4142 struct cache_tree *extent_cache,
4145 struct extent_record *rec;
4146 struct cache_extent *cache;
4147 struct btrfs_key key;
4149 cache = lookup_cache_extent(extent_cache, start, len);
4153 rec = container_of(cache, struct extent_record, cache);
4154 if (!is_extent_tree_record(rec))
4157 btrfs_disk_key_to_cpu(&key, &rec->parent_key);
4158 return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
4161 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
4162 struct extent_buffer *buf, int slot)
4164 if (btrfs_header_level(buf)) {
4165 struct btrfs_key_ptr ptr1, ptr2;
4167 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
4168 sizeof(struct btrfs_key_ptr));
4169 read_extent_buffer(buf, &ptr2,
4170 btrfs_node_key_ptr_offset(slot + 1),
4171 sizeof(struct btrfs_key_ptr));
4172 write_extent_buffer(buf, &ptr1,
4173 btrfs_node_key_ptr_offset(slot + 1),
4174 sizeof(struct btrfs_key_ptr));
4175 write_extent_buffer(buf, &ptr2,
4176 btrfs_node_key_ptr_offset(slot),
4177 sizeof(struct btrfs_key_ptr));
4179 struct btrfs_disk_key key;
4180 btrfs_node_key(buf, &key, 0);
4181 btrfs_fixup_low_keys(root, path, &key,
4182 btrfs_header_level(buf) + 1);
4185 struct btrfs_item *item1, *item2;
4186 struct btrfs_key k1, k2;
4187 char *item1_data, *item2_data;
4188 u32 item1_offset, item2_offset, item1_size, item2_size;
4190 item1 = btrfs_item_nr(slot);
4191 item2 = btrfs_item_nr(slot + 1);
4192 btrfs_item_key_to_cpu(buf, &k1, slot);
4193 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
4194 item1_offset = btrfs_item_offset(buf, item1);
4195 item2_offset = btrfs_item_offset(buf, item2);
4196 item1_size = btrfs_item_size(buf, item1);
4197 item2_size = btrfs_item_size(buf, item2);
4199 item1_data = malloc(item1_size);
4202 item2_data = malloc(item2_size);
4208 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
4209 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
4211 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
4212 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
4216 btrfs_set_item_offset(buf, item1, item2_offset);
4217 btrfs_set_item_offset(buf, item2, item1_offset);
4218 btrfs_set_item_size(buf, item1, item2_size);
4219 btrfs_set_item_size(buf, item2, item1_size);
4221 path->slots[0] = slot;
4222 btrfs_set_item_key_unsafe(root, path, &k2);
4223 path->slots[0] = slot + 1;
4224 btrfs_set_item_key_unsafe(root, path, &k1);
4229 static int fix_key_order(struct btrfs_trans_handle *trans,
4230 struct btrfs_root *root,
4231 struct btrfs_path *path)
4233 struct extent_buffer *buf;
4234 struct btrfs_key k1, k2;
4236 int level = path->lowest_level;
4239 buf = path->nodes[level];
4240 for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
4242 btrfs_node_key_to_cpu(buf, &k1, i);
4243 btrfs_node_key_to_cpu(buf, &k2, i + 1);
4245 btrfs_item_key_to_cpu(buf, &k1, i);
4246 btrfs_item_key_to_cpu(buf, &k2, i + 1);
4248 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
4250 ret = swap_values(root, path, buf, i);
4253 btrfs_mark_buffer_dirty(buf);
4259 static int delete_bogus_item(struct btrfs_trans_handle *trans,
4260 struct btrfs_root *root,
4261 struct btrfs_path *path,
4262 struct extent_buffer *buf, int slot)
4264 struct btrfs_key key;
4265 int nritems = btrfs_header_nritems(buf);
4267 btrfs_item_key_to_cpu(buf, &key, slot);
4269 /* These are all the keys we can deal with missing. */
4270 if (key.type != BTRFS_DIR_INDEX_KEY &&
4271 key.type != BTRFS_EXTENT_ITEM_KEY &&
4272 key.type != BTRFS_METADATA_ITEM_KEY &&
4273 key.type != BTRFS_TREE_BLOCK_REF_KEY &&
4274 key.type != BTRFS_EXTENT_DATA_REF_KEY)
4277 printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
4278 (unsigned long long)key.objectid, key.type,
4279 (unsigned long long)key.offset, slot, buf->start);
4280 memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
4281 btrfs_item_nr_offset(slot + 1),
4282 sizeof(struct btrfs_item) *
4283 (nritems - slot - 1));
4284 btrfs_set_header_nritems(buf, nritems - 1);
4286 struct btrfs_disk_key disk_key;
4288 btrfs_item_key(buf, &disk_key, 0);
4289 btrfs_fixup_low_keys(root, path, &disk_key, 1);
4291 btrfs_mark_buffer_dirty(buf);
4295 static int fix_item_offset(struct btrfs_trans_handle *trans,
4296 struct btrfs_root *root,
4297 struct btrfs_path *path)
4299 struct extent_buffer *buf;
4303 /* We should only get this for leaves */
4304 BUG_ON(path->lowest_level);
4305 buf = path->nodes[0];
4307 for (i = 0; i < btrfs_header_nritems(buf); i++) {
4308 unsigned int shift = 0, offset;
4310 if (i == 0 && btrfs_item_end_nr(buf, i) !=
4311 BTRFS_LEAF_DATA_SIZE(root)) {
4312 if (btrfs_item_end_nr(buf, i) >
4313 BTRFS_LEAF_DATA_SIZE(root)) {
4314 ret = delete_bogus_item(trans, root, path,
4318 fprintf(stderr, "item is off the end of the "
4319 "leaf, can't fix\n");
4323 shift = BTRFS_LEAF_DATA_SIZE(root) -
4324 btrfs_item_end_nr(buf, i);
4325 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
4326 btrfs_item_offset_nr(buf, i - 1)) {
4327 if (btrfs_item_end_nr(buf, i) >
4328 btrfs_item_offset_nr(buf, i - 1)) {
4329 ret = delete_bogus_item(trans, root, path,
4333 fprintf(stderr, "items overlap, can't fix\n");
4337 shift = btrfs_item_offset_nr(buf, i - 1) -
4338 btrfs_item_end_nr(buf, i);
4343 printf("Shifting item nr %d by %u bytes in block %llu\n",
4344 i, shift, (unsigned long long)buf->start);
4345 offset = btrfs_item_offset_nr(buf, i);
4346 memmove_extent_buffer(buf,
4347 btrfs_leaf_data(buf) + offset + shift,
4348 btrfs_leaf_data(buf) + offset,
4349 btrfs_item_size_nr(buf, i));
4350 btrfs_set_item_offset(buf, btrfs_item_nr(i),
4352 btrfs_mark_buffer_dirty(buf);
4356 * We may have moved things, in which case we want to exit so we don't
4357 * write those changes out. Once we have proper abort functionality in
4358 * progs this can be changed to something nicer.
4365 * Attempt to fix basic block failures. If we can't fix it for whatever reason
4366 * then just return -EIO.
4368 static int try_to_fix_bad_block(struct btrfs_root *root,
4369 struct extent_buffer *buf,
4370 enum btrfs_tree_block_status status)
4372 struct btrfs_trans_handle *trans;
4373 struct ulist *roots;
4374 struct ulist_node *node;
4375 struct btrfs_root *search_root;
4376 struct btrfs_path *path;
4377 struct ulist_iterator iter;
4378 struct btrfs_key root_key, key;
4381 if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
4382 status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4385 path = btrfs_alloc_path();
4389 ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start,
4392 btrfs_free_path(path);
4396 ULIST_ITER_INIT(&iter);
4397 while ((node = ulist_next(roots, &iter))) {
4398 root_key.objectid = node->val;
4399 root_key.type = BTRFS_ROOT_ITEM_KEY;
4400 root_key.offset = (u64)-1;
4402 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
4409 trans = btrfs_start_transaction(search_root, 0);
4410 if (IS_ERR(trans)) {
4411 ret = PTR_ERR(trans);
4415 path->lowest_level = btrfs_header_level(buf);
4416 path->skip_check_block = 1;
4417 if (path->lowest_level)
4418 btrfs_node_key_to_cpu(buf, &key, 0);
4420 btrfs_item_key_to_cpu(buf, &key, 0);
4421 ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1);
4424 btrfs_commit_transaction(trans, search_root);
4427 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
4428 ret = fix_key_order(trans, search_root, path);
4429 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
4430 ret = fix_item_offset(trans, search_root, path);
4432 btrfs_commit_transaction(trans, search_root);
4435 btrfs_release_path(path);
4436 btrfs_commit_transaction(trans, search_root);
4439 btrfs_free_path(path);
4443 static int check_block(struct btrfs_root *root,
4444 struct cache_tree *extent_cache,
4445 struct extent_buffer *buf, u64 flags)
4447 struct extent_record *rec;
4448 struct cache_extent *cache;
4449 struct btrfs_key key;
4450 enum btrfs_tree_block_status status;
4454 cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
4457 rec = container_of(cache, struct extent_record, cache);
4458 rec->generation = btrfs_header_generation(buf);
4460 level = btrfs_header_level(buf);
4461 if (btrfs_header_nritems(buf) > 0) {
4464 btrfs_item_key_to_cpu(buf, &key, 0);
4466 btrfs_node_key_to_cpu(buf, &key, 0);
4468 rec->info_objectid = key.objectid;
4470 rec->info_level = level;
4472 if (btrfs_is_leaf(buf))
4473 status = btrfs_check_leaf(root, &rec->parent_key, buf);
4475 status = btrfs_check_node(root, &rec->parent_key, buf);
4477 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4479 status = try_to_fix_bad_block(root, buf, status);
4480 if (status != BTRFS_TREE_BLOCK_CLEAN) {
4482 fprintf(stderr, "bad block %llu\n",
4483 (unsigned long long)buf->start);
4486 * Signal to callers we need to start the scan over
4487 * again since we'll have cowed blocks.
4492 rec->content_checked = 1;
4493 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4494 rec->owner_ref_checked = 1;
4496 ret = check_owner_ref(root, rec, buf);
4498 rec->owner_ref_checked = 1;
4502 maybe_free_extent_rec(extent_cache, rec);
4507 static struct tree_backref *find_tree_backref(struct extent_record *rec,
4508 u64 parent, u64 root)
4510 struct rb_node *node;
4511 struct tree_backref *back = NULL;
4512 struct tree_backref match = {
4519 match.parent = parent;
4520 match.node.full_backref = 1;
4525 node = rb_search(&rec->backref_tree, &match.node.node,
4526 (rb_compare_keys)compare_extent_backref, NULL);
4528 back = to_tree_backref(rb_node_to_extent_backref(node));
4533 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
4534 u64 parent, u64 root)
4536 struct tree_backref *ref = malloc(sizeof(*ref));
4540 memset(&ref->node, 0, sizeof(ref->node));
4542 ref->parent = parent;
4543 ref->node.full_backref = 1;
4546 ref->node.full_backref = 0;
4548 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4553 static struct data_backref *find_data_backref(struct extent_record *rec,
4554 u64 parent, u64 root,
4555 u64 owner, u64 offset,
4557 u64 disk_bytenr, u64 bytes)
4559 struct rb_node *node;
4560 struct data_backref *back = NULL;
4561 struct data_backref match = {
4568 .found_ref = found_ref,
4569 .disk_bytenr = disk_bytenr,
4573 match.parent = parent;
4574 match.node.full_backref = 1;
4579 node = rb_search(&rec->backref_tree, &match.node.node,
4580 (rb_compare_keys)compare_extent_backref, NULL);
4582 back = to_data_backref(rb_node_to_extent_backref(node));
4587 static struct data_backref *alloc_data_backref(struct extent_record *rec,
4588 u64 parent, u64 root,
4589 u64 owner, u64 offset,
4592 struct data_backref *ref = malloc(sizeof(*ref));
4596 memset(&ref->node, 0, sizeof(ref->node));
4597 ref->node.is_data = 1;
4600 ref->parent = parent;
4603 ref->node.full_backref = 1;
4607 ref->offset = offset;
4608 ref->node.full_backref = 0;
4610 ref->bytes = max_size;
4613 rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
4614 if (max_size > rec->max_size)
4615 rec->max_size = max_size;
4619 /* Check if the type of extent matches with its chunk */
4620 static void check_extent_type(struct extent_record *rec)
4622 struct btrfs_block_group_cache *bg_cache;
4624 bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
4628 /* data extent, check chunk directly*/
4629 if (!rec->metadata) {
4630 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
4631 rec->wrong_chunk_type = 1;
4635 /* metadata extent, check the obvious case first */
4636 if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
4637 BTRFS_BLOCK_GROUP_METADATA))) {
4638 rec->wrong_chunk_type = 1;
4643 * Check SYSTEM extent, as it's also marked as metadata, we can only
4644 * make sure it's a SYSTEM extent by its backref
4646 if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
4647 struct extent_backref *node;
4648 struct tree_backref *tback;
4651 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
4652 if (node->is_data) {
4653 /* tree block shouldn't have data backref */
4654 rec->wrong_chunk_type = 1;
4657 tback = container_of(node, struct tree_backref, node);
4659 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
4660 bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
4662 bg_type = BTRFS_BLOCK_GROUP_METADATA;
4663 if (!(bg_cache->flags & bg_type))
4664 rec->wrong_chunk_type = 1;
4669 * Allocate a new extent record, fill default values from @tmpl and insert int
4670 * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
4671 * the cache, otherwise it fails.
4673 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
4674 struct extent_record *tmpl)
4676 struct extent_record *rec;
4679 rec = malloc(sizeof(*rec));
4682 rec->start = tmpl->start;
4683 rec->max_size = tmpl->max_size;
4684 rec->nr = max(tmpl->nr, tmpl->max_size);
4685 rec->found_rec = tmpl->found_rec;
4686 rec->content_checked = tmpl->content_checked;
4687 rec->owner_ref_checked = tmpl->owner_ref_checked;
4688 rec->num_duplicates = 0;
4689 rec->metadata = tmpl->metadata;
4690 rec->flag_block_full_backref = FLAG_UNSET;
4691 rec->bad_full_backref = 0;
4692 rec->crossing_stripes = 0;
4693 rec->wrong_chunk_type = 0;
4694 rec->is_root = tmpl->is_root;
4695 rec->refs = tmpl->refs;
4696 rec->extent_item_refs = tmpl->extent_item_refs;
4697 rec->parent_generation = tmpl->parent_generation;
4698 INIT_LIST_HEAD(&rec->backrefs);
4699 INIT_LIST_HEAD(&rec->dups);
4700 INIT_LIST_HEAD(&rec->list);
4701 rec->backref_tree = RB_ROOT;
4702 memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
4703 rec->cache.start = tmpl->start;
4704 rec->cache.size = tmpl->nr;
4705 ret = insert_cache_extent(extent_cache, &rec->cache);
4707 bytes_used += rec->nr;
4710 rec->crossing_stripes = check_crossing_stripes(rec->start,
4711 global_info->tree_root->nodesize);
4712 check_extent_type(rec);
4717 * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
4719 * - refs - if found, increase refs
4720 * - is_root - if found, set
4721 * - content_checked - if found, set
4722 * - owner_ref_checked - if found, set
4724 * If not found, create a new one, initialize and insert.
4726 static int add_extent_rec(struct cache_tree *extent_cache,
4727 struct extent_record *tmpl)
4729 struct extent_record *rec;
4730 struct cache_extent *cache;
4734 cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
4736 rec = container_of(cache, struct extent_record, cache);
4740 rec->nr = max(tmpl->nr, tmpl->max_size);
4743 * We need to make sure to reset nr to whatever the extent
4744 * record says was the real size, this way we can compare it to
4747 if (tmpl->found_rec) {
4748 if (tmpl->start != rec->start || rec->found_rec) {
4749 struct extent_record *tmp;
4752 if (list_empty(&rec->list))
4753 list_add_tail(&rec->list,
4754 &duplicate_extents);
4757 * We have to do this song and dance in case we
4758 * find an extent record that falls inside of
4759 * our current extent record but does not have
4760 * the same objectid.
4762 tmp = malloc(sizeof(*tmp));
4765 tmp->start = tmpl->start;
4766 tmp->max_size = tmpl->max_size;
4769 tmp->metadata = tmpl->metadata;
4770 tmp->extent_item_refs = tmpl->extent_item_refs;
4771 INIT_LIST_HEAD(&tmp->list);
4772 list_add_tail(&tmp->list, &rec->dups);
4773 rec->num_duplicates++;
4780 if (tmpl->extent_item_refs && !dup) {
4781 if (rec->extent_item_refs) {
4782 fprintf(stderr, "block %llu rec "
4783 "extent_item_refs %llu, passed %llu\n",
4784 (unsigned long long)tmpl->start,
4785 (unsigned long long)
4786 rec->extent_item_refs,
4787 (unsigned long long)tmpl->extent_item_refs);
4789 rec->extent_item_refs = tmpl->extent_item_refs;
4793 if (tmpl->content_checked)
4794 rec->content_checked = 1;
4795 if (tmpl->owner_ref_checked)
4796 rec->owner_ref_checked = 1;
4797 memcpy(&rec->parent_key, &tmpl->parent_key,
4798 sizeof(tmpl->parent_key));
4799 if (tmpl->parent_generation)
4800 rec->parent_generation = tmpl->parent_generation;
4801 if (rec->max_size < tmpl->max_size)
4802 rec->max_size = tmpl->max_size;
4805 * A metadata extent can't cross stripe_len boundary, otherwise
4806 * kernel scrub won't be able to handle it.
4807 * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
4811 rec->crossing_stripes = check_crossing_stripes(
4812 rec->start, global_info->tree_root->nodesize);
4813 check_extent_type(rec);
4814 maybe_free_extent_rec(extent_cache, rec);
4818 ret = add_extent_rec_nolookup(extent_cache, tmpl);
4823 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
4824 u64 parent, u64 root, int found_ref)
4826 struct extent_record *rec;
4827 struct tree_backref *back;
4828 struct cache_extent *cache;
4830 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4832 struct extent_record tmpl;
4834 memset(&tmpl, 0, sizeof(tmpl));
4835 tmpl.start = bytenr;
4839 add_extent_rec_nolookup(extent_cache, &tmpl);
4841 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4846 rec = container_of(cache, struct extent_record, cache);
4847 if (rec->start != bytenr) {
4851 back = find_tree_backref(rec, parent, root);
4853 back = alloc_tree_backref(rec, parent, root);
4858 if (back->node.found_ref) {
4859 fprintf(stderr, "Extent back ref already exists "
4860 "for %llu parent %llu root %llu \n",
4861 (unsigned long long)bytenr,
4862 (unsigned long long)parent,
4863 (unsigned long long)root);
4865 back->node.found_ref = 1;
4867 if (back->node.found_extent_tree) {
4868 fprintf(stderr, "Extent back ref already exists "
4869 "for %llu parent %llu root %llu \n",
4870 (unsigned long long)bytenr,
4871 (unsigned long long)parent,
4872 (unsigned long long)root);
4874 back->node.found_extent_tree = 1;
4876 check_extent_type(rec);
4877 maybe_free_extent_rec(extent_cache, rec);
4881 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
4882 u64 parent, u64 root, u64 owner, u64 offset,
4883 u32 num_refs, int found_ref, u64 max_size)
4885 struct extent_record *rec;
4886 struct data_backref *back;
4887 struct cache_extent *cache;
4889 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4891 struct extent_record tmpl;
4893 memset(&tmpl, 0, sizeof(tmpl));
4894 tmpl.start = bytenr;
4896 tmpl.max_size = max_size;
4898 add_extent_rec_nolookup(extent_cache, &tmpl);
4900 cache = lookup_cache_extent(extent_cache, bytenr, 1);
4905 rec = container_of(cache, struct extent_record, cache);
4906 if (rec->max_size < max_size)
4907 rec->max_size = max_size;
4910 * If found_ref is set then max_size is the real size and must match the
4911 * existing refs. So if we have already found a ref then we need to
4912 * make sure that this ref matches the existing one, otherwise we need
4913 * to add a new backref so we can notice that the backrefs don't match
4914 * and we need to figure out who is telling the truth. This is to
4915 * account for that awful fsync bug I introduced where we'd end up with
4916 * a btrfs_file_extent_item that would have its length include multiple
4917 * prealloc extents or point inside of a prealloc extent.
4919 back = find_data_backref(rec, parent, root, owner, offset, found_ref,
4922 back = alloc_data_backref(rec, parent, root, owner, offset,
4928 BUG_ON(num_refs != 1);
4929 if (back->node.found_ref)
4930 BUG_ON(back->bytes != max_size);
4931 back->node.found_ref = 1;
4932 back->found_ref += 1;
4933 back->bytes = max_size;
4934 back->disk_bytenr = bytenr;
4936 rec->content_checked = 1;
4937 rec->owner_ref_checked = 1;
4939 if (back->node.found_extent_tree) {
4940 fprintf(stderr, "Extent back ref already exists "
4941 "for %llu parent %llu root %llu "
4942 "owner %llu offset %llu num_refs %lu\n",
4943 (unsigned long long)bytenr,
4944 (unsigned long long)parent,
4945 (unsigned long long)root,
4946 (unsigned long long)owner,
4947 (unsigned long long)offset,
4948 (unsigned long)num_refs);
4950 back->num_refs = num_refs;
4951 back->node.found_extent_tree = 1;
4953 maybe_free_extent_rec(extent_cache, rec);
4957 static int add_pending(struct cache_tree *pending,
4958 struct cache_tree *seen, u64 bytenr, u32 size)
4961 ret = add_cache_extent(seen, bytenr, size);
4964 add_cache_extent(pending, bytenr, size);
4968 static int pick_next_pending(struct cache_tree *pending,
4969 struct cache_tree *reada,
4970 struct cache_tree *nodes,
4971 u64 last, struct block_info *bits, int bits_nr,
4974 unsigned long node_start = last;
4975 struct cache_extent *cache;
4978 cache = search_cache_extent(reada, 0);
4980 bits[0].start = cache->start;
4981 bits[0].size = cache->size;
4986 if (node_start > 32768)
4987 node_start -= 32768;
4989 cache = search_cache_extent(nodes, node_start);
4991 cache = search_cache_extent(nodes, 0);
4994 cache = search_cache_extent(pending, 0);
4999 bits[ret].start = cache->start;
5000 bits[ret].size = cache->size;
5001 cache = next_cache_extent(cache);
5003 } while (cache && ret < bits_nr);
5009 bits[ret].start = cache->start;
5010 bits[ret].size = cache->size;
5011 cache = next_cache_extent(cache);
5013 } while (cache && ret < bits_nr);
5015 if (bits_nr - ret > 8) {
5016 u64 lookup = bits[0].start + bits[0].size;
5017 struct cache_extent *next;
5018 next = search_cache_extent(pending, lookup);
5020 if (next->start - lookup > 32768)
5022 bits[ret].start = next->start;
5023 bits[ret].size = next->size;
5024 lookup = next->start + next->size;
5028 next = next_cache_extent(next);
5036 static void free_chunk_record(struct cache_extent *cache)
5038 struct chunk_record *rec;
5040 rec = container_of(cache, struct chunk_record, cache);
5041 list_del_init(&rec->list);
5042 list_del_init(&rec->dextents);
5046 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
5048 cache_tree_free_extents(chunk_cache, free_chunk_record);
5051 static void free_device_record(struct rb_node *node)
5053 struct device_record *rec;
5055 rec = container_of(node, struct device_record, node);
5059 FREE_RB_BASED_TREE(device_cache, free_device_record);
5061 int insert_block_group_record(struct block_group_tree *tree,
5062 struct block_group_record *bg_rec)
5066 ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
5070 list_add_tail(&bg_rec->list, &tree->block_groups);
5074 static void free_block_group_record(struct cache_extent *cache)
5076 struct block_group_record *rec;
5078 rec = container_of(cache, struct block_group_record, cache);
5079 list_del_init(&rec->list);
5083 void free_block_group_tree(struct block_group_tree *tree)
5085 cache_tree_free_extents(&tree->tree, free_block_group_record);
5088 int insert_device_extent_record(struct device_extent_tree *tree,
5089 struct device_extent_record *de_rec)
5094 * Device extent is a bit different from the other extents, because
5095 * the extents which belong to the different devices may have the
5096 * same start and size, so we need use the special extent cache
5097 * search/insert functions.
5099 ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
5103 list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
5104 list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
5108 static void free_device_extent_record(struct cache_extent *cache)
5110 struct device_extent_record *rec;
5112 rec = container_of(cache, struct device_extent_record, cache);
5113 if (!list_empty(&rec->chunk_list))
5114 list_del_init(&rec->chunk_list);
5115 if (!list_empty(&rec->device_list))
5116 list_del_init(&rec->device_list);
5120 void free_device_extent_tree(struct device_extent_tree *tree)
5122 cache_tree_free_extents(&tree->tree, free_device_extent_record);
5125 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5126 static int process_extent_ref_v0(struct cache_tree *extent_cache,
5127 struct extent_buffer *leaf, int slot)
5129 struct btrfs_extent_ref_v0 *ref0;
5130 struct btrfs_key key;
5132 btrfs_item_key_to_cpu(leaf, &key, slot);
5133 ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
5134 if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
5135 add_tree_backref(extent_cache, key.objectid, key.offset, 0, 0);
5137 add_data_backref(extent_cache, key.objectid, key.offset, 0,
5138 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
5144 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
5145 struct btrfs_key *key,
5148 struct btrfs_chunk *ptr;
5149 struct chunk_record *rec;
5152 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
5153 num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
5155 rec = calloc(1, btrfs_chunk_record_size(num_stripes));
5157 fprintf(stderr, "memory allocation failed\n");
5161 INIT_LIST_HEAD(&rec->list);
5162 INIT_LIST_HEAD(&rec->dextents);
5165 rec->cache.start = key->offset;
5166 rec->cache.size = btrfs_chunk_length(leaf, ptr);
5168 rec->generation = btrfs_header_generation(leaf);
5170 rec->objectid = key->objectid;
5171 rec->type = key->type;
5172 rec->offset = key->offset;
5174 rec->length = rec->cache.size;
5175 rec->owner = btrfs_chunk_owner(leaf, ptr);
5176 rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
5177 rec->type_flags = btrfs_chunk_type(leaf, ptr);
5178 rec->io_width = btrfs_chunk_io_width(leaf, ptr);
5179 rec->io_align = btrfs_chunk_io_align(leaf, ptr);
5180 rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
5181 rec->num_stripes = num_stripes;
5182 rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
5184 for (i = 0; i < rec->num_stripes; ++i) {
5185 rec->stripes[i].devid =
5186 btrfs_stripe_devid_nr(leaf, ptr, i);
5187 rec->stripes[i].offset =
5188 btrfs_stripe_offset_nr(leaf, ptr, i);
5189 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
5190 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
5197 static int process_chunk_item(struct cache_tree *chunk_cache,
5198 struct btrfs_key *key, struct extent_buffer *eb,
5201 struct chunk_record *rec;
5204 rec = btrfs_new_chunk_record(eb, key, slot);
5205 ret = insert_cache_extent(chunk_cache, &rec->cache);
5207 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
5208 rec->offset, rec->length);
5215 static int process_device_item(struct rb_root *dev_cache,
5216 struct btrfs_key *key, struct extent_buffer *eb, int slot)
5218 struct btrfs_dev_item *ptr;
5219 struct device_record *rec;
5222 ptr = btrfs_item_ptr(eb,
5223 slot, struct btrfs_dev_item);
5225 rec = malloc(sizeof(*rec));
5227 fprintf(stderr, "memory allocation failed\n");
5231 rec->devid = key->offset;
5232 rec->generation = btrfs_header_generation(eb);
5234 rec->objectid = key->objectid;
5235 rec->type = key->type;
5236 rec->offset = key->offset;
5238 rec->devid = btrfs_device_id(eb, ptr);
5239 rec->total_byte = btrfs_device_total_bytes(eb, ptr);
5240 rec->byte_used = btrfs_device_bytes_used(eb, ptr);
5242 ret = rb_insert(dev_cache, &rec->node, device_record_compare);
5244 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
5251 struct block_group_record *
5252 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
5255 struct btrfs_block_group_item *ptr;
5256 struct block_group_record *rec;
5258 rec = calloc(1, sizeof(*rec));
5260 fprintf(stderr, "memory allocation failed\n");
5264 rec->cache.start = key->objectid;
5265 rec->cache.size = key->offset;
5267 rec->generation = btrfs_header_generation(leaf);
5269 rec->objectid = key->objectid;
5270 rec->type = key->type;
5271 rec->offset = key->offset;
5273 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
5274 rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
5276 INIT_LIST_HEAD(&rec->list);
5281 static int process_block_group_item(struct block_group_tree *block_group_cache,
5282 struct btrfs_key *key,
5283 struct extent_buffer *eb, int slot)
5285 struct block_group_record *rec;
5288 rec = btrfs_new_block_group_record(eb, key, slot);
5289 ret = insert_block_group_record(block_group_cache, rec);
5291 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
5292 rec->objectid, rec->offset);
5299 struct device_extent_record *
5300 btrfs_new_device_extent_record(struct extent_buffer *leaf,
5301 struct btrfs_key *key, int slot)
5303 struct device_extent_record *rec;
5304 struct btrfs_dev_extent *ptr;
5306 rec = calloc(1, sizeof(*rec));
5308 fprintf(stderr, "memory allocation failed\n");
5312 rec->cache.objectid = key->objectid;
5313 rec->cache.start = key->offset;
5315 rec->generation = btrfs_header_generation(leaf);
5317 rec->objectid = key->objectid;
5318 rec->type = key->type;
5319 rec->offset = key->offset;
5321 ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
5322 rec->chunk_objecteid =
5323 btrfs_dev_extent_chunk_objectid(leaf, ptr);
5325 btrfs_dev_extent_chunk_offset(leaf, ptr);
5326 rec->length = btrfs_dev_extent_length(leaf, ptr);
5327 rec->cache.size = rec->length;
5329 INIT_LIST_HEAD(&rec->chunk_list);
5330 INIT_LIST_HEAD(&rec->device_list);
5336 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
5337 struct btrfs_key *key, struct extent_buffer *eb,
5340 struct device_extent_record *rec;
5343 rec = btrfs_new_device_extent_record(eb, key, slot);
5344 ret = insert_device_extent_record(dev_extent_cache, rec);
5347 "Device extent[%llu, %llu, %llu] existed.\n",
5348 rec->objectid, rec->offset, rec->length);
5355 static int process_extent_item(struct btrfs_root *root,
5356 struct cache_tree *extent_cache,
5357 struct extent_buffer *eb, int slot)
5359 struct btrfs_extent_item *ei;
5360 struct btrfs_extent_inline_ref *iref;
5361 struct btrfs_extent_data_ref *dref;
5362 struct btrfs_shared_data_ref *sref;
5363 struct btrfs_key key;
5364 struct extent_record tmpl;
5368 u32 item_size = btrfs_item_size_nr(eb, slot);
5374 btrfs_item_key_to_cpu(eb, &key, slot);
5376 if (key.type == BTRFS_METADATA_ITEM_KEY) {
5378 num_bytes = root->nodesize;
5380 num_bytes = key.offset;
5383 if (item_size < sizeof(*ei)) {
5384 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
5385 struct btrfs_extent_item_v0 *ei0;
5386 BUG_ON(item_size != sizeof(*ei0));
5387 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
5388 refs = btrfs_extent_refs_v0(eb, ei0);
5392 memset(&tmpl, 0, sizeof(tmpl));
5393 tmpl.start = key.objectid;
5394 tmpl.nr = num_bytes;
5395 tmpl.extent_item_refs = refs;
5396 tmpl.metadata = metadata;
5398 tmpl.max_size = num_bytes;
5400 return add_extent_rec(extent_cache, &tmpl);
5403 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
5404 refs = btrfs_extent_refs(eb, ei);
5405 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
5410 memset(&tmpl, 0, sizeof(tmpl));
5411 tmpl.start = key.objectid;
5412 tmpl.nr = num_bytes;
5413 tmpl.extent_item_refs = refs;
5414 tmpl.metadata = metadata;
5416 tmpl.max_size = num_bytes;
5417 add_extent_rec(extent_cache, &tmpl);
5419 ptr = (unsigned long)(ei + 1);
5420 if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
5421 key.type == BTRFS_EXTENT_ITEM_KEY)
5422 ptr += sizeof(struct btrfs_tree_block_info);
5424 end = (unsigned long)ei + item_size;
5426 iref = (struct btrfs_extent_inline_ref *)ptr;
5427 type = btrfs_extent_inline_ref_type(eb, iref);
5428 offset = btrfs_extent_inline_ref_offset(eb, iref);
5430 case BTRFS_TREE_BLOCK_REF_KEY:
5431 add_tree_backref(extent_cache, key.objectid,
5434 case BTRFS_SHARED_BLOCK_REF_KEY:
5435 add_tree_backref(extent_cache, key.objectid,
5438 case BTRFS_EXTENT_DATA_REF_KEY:
5439 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
5440 add_data_backref(extent_cache, key.objectid, 0,
5441 btrfs_extent_data_ref_root(eb, dref),
5442 btrfs_extent_data_ref_objectid(eb,
5444 btrfs_extent_data_ref_offset(eb, dref),
5445 btrfs_extent_data_ref_count(eb, dref),
5448 case BTRFS_SHARED_DATA_REF_KEY:
5449 sref = (struct btrfs_shared_data_ref *)(iref + 1);
5450 add_data_backref(extent_cache, key.objectid, offset,
5452 btrfs_shared_data_ref_count(eb, sref),
5456 fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
5457 key.objectid, key.type, num_bytes);
5460 ptr += btrfs_extent_inline_ref_size(type);
5467 static int check_cache_range(struct btrfs_root *root,
5468 struct btrfs_block_group_cache *cache,
5469 u64 offset, u64 bytes)
5471 struct btrfs_free_space *entry;
5477 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
5478 bytenr = btrfs_sb_offset(i);
5479 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
5480 cache->key.objectid, bytenr, 0,
5481 &logical, &nr, &stripe_len);
5486 if (logical[nr] + stripe_len <= offset)
5488 if (offset + bytes <= logical[nr])
5490 if (logical[nr] == offset) {
5491 if (stripe_len >= bytes) {
5495 bytes -= stripe_len;
5496 offset += stripe_len;
5497 } else if (logical[nr] < offset) {
5498 if (logical[nr] + stripe_len >=
5503 bytes = (offset + bytes) -
5504 (logical[nr] + stripe_len);
5505 offset = logical[nr] + stripe_len;
5508 * Could be tricky, the super may land in the
5509 * middle of the area we're checking. First
5510 * check the easiest case, it's at the end.
5512 if (logical[nr] + stripe_len >=
5514 bytes = logical[nr] - offset;
5518 /* Check the left side */
5519 ret = check_cache_range(root, cache,
5521 logical[nr] - offset);
5527 /* Now we continue with the right side */
5528 bytes = (offset + bytes) -
5529 (logical[nr] + stripe_len);
5530 offset = logical[nr] + stripe_len;
5537 entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
5539 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
5540 offset, offset+bytes);
5544 if (entry->offset != offset) {
5545 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
5550 if (entry->bytes != bytes) {
5551 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
5552 bytes, entry->bytes, offset);
5556 unlink_free_space(cache->free_space_ctl, entry);
5561 static int verify_space_cache(struct btrfs_root *root,
5562 struct btrfs_block_group_cache *cache)
5564 struct btrfs_path *path;
5565 struct extent_buffer *leaf;
5566 struct btrfs_key key;
5570 path = btrfs_alloc_path();
5574 root = root->fs_info->extent_root;
5576 last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
5578 key.objectid = last;
5580 key.type = BTRFS_EXTENT_ITEM_KEY;
5582 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5587 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5588 ret = btrfs_next_leaf(root, path);
5596 leaf = path->nodes[0];
5597 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5598 if (key.objectid >= cache->key.offset + cache->key.objectid)
5600 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
5601 key.type != BTRFS_METADATA_ITEM_KEY) {
5606 if (last == key.objectid) {
5607 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5608 last = key.objectid + key.offset;
5610 last = key.objectid + root->nodesize;
5615 ret = check_cache_range(root, cache, last,
5616 key.objectid - last);
5619 if (key.type == BTRFS_EXTENT_ITEM_KEY)
5620 last = key.objectid + key.offset;
5622 last = key.objectid + root->nodesize;
5626 if (last < cache->key.objectid + cache->key.offset)
5627 ret = check_cache_range(root, cache, last,
5628 cache->key.objectid +
5629 cache->key.offset - last);
5632 btrfs_free_path(path);
5635 !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
5636 fprintf(stderr, "There are still entries left in the space "
5644 static int check_space_cache(struct btrfs_root *root)
5646 struct btrfs_block_group_cache *cache;
5647 u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
5651 if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
5652 btrfs_super_generation(root->fs_info->super_copy) !=
5653 btrfs_super_cache_generation(root->fs_info->super_copy)) {
5654 printf("cache and super generation don't match, space cache "
5655 "will be invalidated\n");
5659 if (ctx.progress_enabled) {
5660 ctx.tp = TASK_FREE_SPACE;
5661 task_start(ctx.info);
5665 cache = btrfs_lookup_first_block_group(root->fs_info, start);
5669 start = cache->key.objectid + cache->key.offset;
5670 if (!cache->free_space_ctl) {
5671 if (btrfs_init_free_space_ctl(cache,
5672 root->sectorsize)) {
5677 btrfs_remove_free_space_cache(cache);
5680 if (btrfs_fs_compat_ro(root->fs_info,
5681 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)) {
5682 ret = exclude_super_stripes(root, cache);
5684 fprintf(stderr, "could not exclude super stripes: %s\n",
5689 ret = load_free_space_tree(root->fs_info, cache);
5690 free_excluded_extents(root, cache);
5692 fprintf(stderr, "could not load free space tree: %s\n",
5699 ret = load_free_space_cache(root->fs_info, cache);
5704 ret = verify_space_cache(root, cache);
5706 fprintf(stderr, "cache appears valid but isn't %Lu\n",
5707 cache->key.objectid);
5712 task_stop(ctx.info);
5714 return error ? -EINVAL : 0;
5717 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
5718 u64 num_bytes, unsigned long leaf_offset,
5719 struct extent_buffer *eb) {
5722 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5724 unsigned long csum_offset;
5728 u64 data_checked = 0;
5734 if (num_bytes % root->sectorsize)
5737 data = malloc(num_bytes);
5741 while (offset < num_bytes) {
5744 read_len = num_bytes - offset;
5745 /* read as much space once a time */
5746 ret = read_extent_data(root, data + offset,
5747 bytenr + offset, &read_len, mirror);
5751 /* verify every 4k data's checksum */
5752 while (data_checked < read_len) {
5754 tmp = offset + data_checked;
5756 csum = btrfs_csum_data(NULL, (char *)data + tmp,
5757 csum, root->sectorsize);
5758 btrfs_csum_final(csum, (char *)&csum);
5760 csum_offset = leaf_offset +
5761 tmp / root->sectorsize * csum_size;
5762 read_extent_buffer(eb, (char *)&csum_expected,
5763 csum_offset, csum_size);
5764 /* try another mirror */
5765 if (csum != csum_expected) {
5766 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
5767 mirror, bytenr + tmp,
5768 csum, csum_expected);
5769 num_copies = btrfs_num_copies(
5770 &root->fs_info->mapping_tree,
5772 if (mirror < num_copies - 1) {
5777 data_checked += root->sectorsize;
5786 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
5789 struct btrfs_path *path;
5790 struct extent_buffer *leaf;
5791 struct btrfs_key key;
5794 path = btrfs_alloc_path();
5796 fprintf(stderr, "Error allocating path\n");
5800 key.objectid = bytenr;
5801 key.type = BTRFS_EXTENT_ITEM_KEY;
5802 key.offset = (u64)-1;
5805 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
5808 fprintf(stderr, "Error looking up extent record %d\n", ret);
5809 btrfs_free_path(path);
5812 if (path->slots[0] > 0) {
5815 ret = btrfs_prev_leaf(root, path);
5818 } else if (ret > 0) {
5825 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5828 * Block group items come before extent items if they have the same
5829 * bytenr, so walk back one more just in case. Dear future traveller,
5830 * first congrats on mastering time travel. Now if it's not too much
5831 * trouble could you go back to 2006 and tell Chris to make the
5832 * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
5833 * EXTENT_ITEM_KEY please?
5835 while (key.type > BTRFS_EXTENT_ITEM_KEY) {
5836 if (path->slots[0] > 0) {
5839 ret = btrfs_prev_leaf(root, path);
5842 } else if (ret > 0) {
5847 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5851 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5852 ret = btrfs_next_leaf(root, path);
5854 fprintf(stderr, "Error going to next leaf "
5856 btrfs_free_path(path);
5862 leaf = path->nodes[0];
5863 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5864 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
5868 if (key.objectid + key.offset < bytenr) {
5872 if (key.objectid > bytenr + num_bytes)
5875 if (key.objectid == bytenr) {
5876 if (key.offset >= num_bytes) {
5880 num_bytes -= key.offset;
5881 bytenr += key.offset;
5882 } else if (key.objectid < bytenr) {
5883 if (key.objectid + key.offset >= bytenr + num_bytes) {
5887 num_bytes = (bytenr + num_bytes) -
5888 (key.objectid + key.offset);
5889 bytenr = key.objectid + key.offset;
5891 if (key.objectid + key.offset < bytenr + num_bytes) {
5892 u64 new_start = key.objectid + key.offset;
5893 u64 new_bytes = bytenr + num_bytes - new_start;
5896 * Weird case, the extent is in the middle of
5897 * our range, we'll have to search one side
5898 * and then the other. Not sure if this happens
5899 * in real life, but no harm in coding it up
5900 * anyway just in case.
5902 btrfs_release_path(path);
5903 ret = check_extent_exists(root, new_start,
5906 fprintf(stderr, "Right section didn't "
5910 num_bytes = key.objectid - bytenr;
5913 num_bytes = key.objectid - bytenr;
5920 if (num_bytes && !ret) {
5921 fprintf(stderr, "There are no extents for csum range "
5922 "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
5926 btrfs_free_path(path);
5930 static int check_csums(struct btrfs_root *root)
5932 struct btrfs_path *path;
5933 struct extent_buffer *leaf;
5934 struct btrfs_key key;
5935 u64 offset = 0, num_bytes = 0;
5936 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
5940 unsigned long leaf_offset;
5942 root = root->fs_info->csum_root;
5943 if (!extent_buffer_uptodate(root->node)) {
5944 fprintf(stderr, "No valid csum tree found\n");
5948 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
5949 key.type = BTRFS_EXTENT_CSUM_KEY;
5952 path = btrfs_alloc_path();
5956 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5958 fprintf(stderr, "Error searching csum tree %d\n", ret);
5959 btrfs_free_path(path);
5963 if (ret > 0 && path->slots[0])
5968 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
5969 ret = btrfs_next_leaf(root, path);
5971 fprintf(stderr, "Error going to next leaf "
5978 leaf = path->nodes[0];
5980 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5981 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
5986 data_len = (btrfs_item_size_nr(leaf, path->slots[0]) /
5987 csum_size) * root->sectorsize;
5988 if (!check_data_csum)
5989 goto skip_csum_check;
5990 leaf_offset = btrfs_item_ptr_offset(leaf, path->slots[0]);
5991 ret = check_extent_csums(root, key.offset, data_len,
5997 offset = key.offset;
5998 } else if (key.offset != offset + num_bytes) {
5999 ret = check_extent_exists(root, offset, num_bytes);
6001 fprintf(stderr, "Csum exists for %Lu-%Lu but "
6002 "there is no extent record\n",
6003 offset, offset+num_bytes);
6006 offset = key.offset;
6009 num_bytes += data_len;
6013 btrfs_free_path(path);
6017 static int is_dropped_key(struct btrfs_key *key,
6018 struct btrfs_key *drop_key) {
6019 if (key->objectid < drop_key->objectid)
6021 else if (key->objectid == drop_key->objectid) {
6022 if (key->type < drop_key->type)
6024 else if (key->type == drop_key->type) {
6025 if (key->offset < drop_key->offset)
6033 * Here are the rules for FULL_BACKREF.
6035 * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
6036 * 2) If btrfs_header_owner(buf) no longer points to buf then we have
6038 * 3) We cowed the block walking down a reloc tree. This is impossible to tell
6039 * if it happened after the relocation occurred since we'll have dropped the
6040 * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
6041 * have no real way to know for sure.
6043 * We process the blocks one root at a time, and we start from the lowest root
6044 * objectid and go to the highest. So we can just lookup the owner backref for
6045 * the record and if we don't find it then we know it doesn't exist and we have
6048 * FIXME: if we ever start reclaiming root objectid's then we need to fix this
6049 * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
6050 * be set or not and then we can check later once we've gathered all the refs.
6052 static int calc_extent_flag(struct btrfs_root *root,
6053 struct cache_tree *extent_cache,
6054 struct extent_buffer *buf,
6055 struct root_item_record *ri,
6058 struct extent_record *rec;
6059 struct cache_extent *cache;
6060 struct tree_backref *tback;
6063 cache = lookup_cache_extent(extent_cache, buf->start, 1);
6064 /* we have added this extent before */
6066 rec = container_of(cache, struct extent_record, cache);
6069 * Except file/reloc tree, we can not have
6072 if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
6077 if (buf->start == ri->bytenr)
6080 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6083 owner = btrfs_header_owner(buf);
6084 if (owner == ri->objectid)
6087 tback = find_tree_backref(rec, 0, owner);
6092 if (rec->flag_block_full_backref != FLAG_UNSET &&
6093 rec->flag_block_full_backref != 0)
6094 rec->bad_full_backref = 1;
6097 *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6098 if (rec->flag_block_full_backref != FLAG_UNSET &&
6099 rec->flag_block_full_backref != 1)
6100 rec->bad_full_backref = 1;
6104 static int run_next_block(struct btrfs_root *root,
6105 struct block_info *bits,
6108 struct cache_tree *pending,
6109 struct cache_tree *seen,
6110 struct cache_tree *reada,
6111 struct cache_tree *nodes,
6112 struct cache_tree *extent_cache,
6113 struct cache_tree *chunk_cache,
6114 struct rb_root *dev_cache,
6115 struct block_group_tree *block_group_cache,
6116 struct device_extent_tree *dev_extent_cache,
6117 struct root_item_record *ri)
6119 struct extent_buffer *buf;
6120 struct extent_record *rec = NULL;
6131 struct btrfs_key key;
6132 struct cache_extent *cache;
6135 nritems = pick_next_pending(pending, reada, nodes, *last, bits,
6136 bits_nr, &reada_bits);
6141 for(i = 0; i < nritems; i++) {
6142 ret = add_cache_extent(reada, bits[i].start,
6147 /* fixme, get the parent transid */
6148 readahead_tree_block(root, bits[i].start,
6152 *last = bits[0].start;
6153 bytenr = bits[0].start;
6154 size = bits[0].size;
6156 cache = lookup_cache_extent(pending, bytenr, size);
6158 remove_cache_extent(pending, cache);
6161 cache = lookup_cache_extent(reada, bytenr, size);
6163 remove_cache_extent(reada, cache);
6166 cache = lookup_cache_extent(nodes, bytenr, size);
6168 remove_cache_extent(nodes, cache);
6171 cache = lookup_cache_extent(extent_cache, bytenr, size);
6173 rec = container_of(cache, struct extent_record, cache);
6174 gen = rec->parent_generation;
6177 /* fixme, get the real parent transid */
6178 buf = read_tree_block(root, bytenr, size, gen);
6179 if (!extent_buffer_uptodate(buf)) {
6180 record_bad_block_io(root->fs_info,
6181 extent_cache, bytenr, size);
6185 nritems = btrfs_header_nritems(buf);
6188 if (!init_extent_tree) {
6189 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
6190 btrfs_header_level(buf), 1, NULL,
6193 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6195 fprintf(stderr, "Couldn't calc extent flags\n");
6196 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6201 ret = calc_extent_flag(root, extent_cache, buf, ri, &flags);
6203 fprintf(stderr, "Couldn't calc extent flags\n");
6204 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6208 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6210 ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
6211 ri->objectid == btrfs_header_owner(buf)) {
6213 * Ok we got to this block from it's original owner and
6214 * we have FULL_BACKREF set. Relocation can leave
6215 * converted blocks over so this is altogether possible,
6216 * however it's not possible if the generation > the
6217 * last snapshot, so check for this case.
6219 if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
6220 btrfs_header_generation(buf) > ri->last_snapshot) {
6221 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
6222 rec->bad_full_backref = 1;
6227 (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
6228 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
6229 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
6230 rec->bad_full_backref = 1;
6234 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6235 rec->flag_block_full_backref = 1;
6239 rec->flag_block_full_backref = 0;
6241 owner = btrfs_header_owner(buf);
6244 ret = check_block(root, extent_cache, buf, flags);
6248 if (btrfs_is_leaf(buf)) {
6249 btree_space_waste += btrfs_leaf_free_space(root, buf);
6250 for (i = 0; i < nritems; i++) {
6251 struct btrfs_file_extent_item *fi;
6252 btrfs_item_key_to_cpu(buf, &key, i);
6253 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
6254 process_extent_item(root, extent_cache, buf,
6258 if (key.type == BTRFS_METADATA_ITEM_KEY) {
6259 process_extent_item(root, extent_cache, buf,
6263 if (key.type == BTRFS_EXTENT_CSUM_KEY) {
6265 btrfs_item_size_nr(buf, i);
6268 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6269 process_chunk_item(chunk_cache, &key, buf, i);
6272 if (key.type == BTRFS_DEV_ITEM_KEY) {
6273 process_device_item(dev_cache, &key, buf, i);
6276 if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
6277 process_block_group_item(block_group_cache,
6281 if (key.type == BTRFS_DEV_EXTENT_KEY) {
6282 process_device_extent_item(dev_extent_cache,
6287 if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
6288 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
6289 process_extent_ref_v0(extent_cache, buf, i);
6296 if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
6297 add_tree_backref(extent_cache, key.objectid, 0,
6301 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
6302 add_tree_backref(extent_cache, key.objectid,
6306 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6307 struct btrfs_extent_data_ref *ref;
6308 ref = btrfs_item_ptr(buf, i,
6309 struct btrfs_extent_data_ref);
6310 add_data_backref(extent_cache,
6312 btrfs_extent_data_ref_root(buf, ref),
6313 btrfs_extent_data_ref_objectid(buf,
6315 btrfs_extent_data_ref_offset(buf, ref),
6316 btrfs_extent_data_ref_count(buf, ref),
6317 0, root->sectorsize);
6320 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6321 struct btrfs_shared_data_ref *ref;
6322 ref = btrfs_item_ptr(buf, i,
6323 struct btrfs_shared_data_ref);
6324 add_data_backref(extent_cache,
6325 key.objectid, key.offset, 0, 0, 0,
6326 btrfs_shared_data_ref_count(buf, ref),
6327 0, root->sectorsize);
6330 if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
6331 struct bad_item *bad;
6333 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
6337 bad = malloc(sizeof(struct bad_item));
6340 INIT_LIST_HEAD(&bad->list);
6341 memcpy(&bad->key, &key,
6342 sizeof(struct btrfs_key));
6343 bad->root_id = owner;
6344 list_add_tail(&bad->list, &delete_items);
6347 if (key.type != BTRFS_EXTENT_DATA_KEY)
6349 fi = btrfs_item_ptr(buf, i,
6350 struct btrfs_file_extent_item);
6351 if (btrfs_file_extent_type(buf, fi) ==
6352 BTRFS_FILE_EXTENT_INLINE)
6354 if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
6357 data_bytes_allocated +=
6358 btrfs_file_extent_disk_num_bytes(buf, fi);
6359 if (data_bytes_allocated < root->sectorsize) {
6362 data_bytes_referenced +=
6363 btrfs_file_extent_num_bytes(buf, fi);
6364 add_data_backref(extent_cache,
6365 btrfs_file_extent_disk_bytenr(buf, fi),
6366 parent, owner, key.objectid, key.offset -
6367 btrfs_file_extent_offset(buf, fi), 1, 1,
6368 btrfs_file_extent_disk_num_bytes(buf, fi));
6372 struct btrfs_key first_key;
6374 first_key.objectid = 0;
6377 btrfs_item_key_to_cpu(buf, &first_key, 0);
6378 level = btrfs_header_level(buf);
6379 for (i = 0; i < nritems; i++) {
6380 struct extent_record tmpl;
6382 ptr = btrfs_node_blockptr(buf, i);
6383 size = root->nodesize;
6384 btrfs_node_key_to_cpu(buf, &key, i);
6386 if ((level == ri->drop_level)
6387 && is_dropped_key(&key, &ri->drop_key)) {
6392 memset(&tmpl, 0, sizeof(tmpl));
6393 btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
6394 tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
6399 tmpl.max_size = size;
6400 ret = add_extent_rec(extent_cache, &tmpl);
6403 add_tree_backref(extent_cache, ptr, parent, owner, 1);
6406 add_pending(nodes, seen, ptr, size);
6408 add_pending(pending, seen, ptr, size);
6411 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
6412 nritems) * sizeof(struct btrfs_key_ptr);
6414 total_btree_bytes += buf->len;
6415 if (fs_root_objectid(btrfs_header_owner(buf)))
6416 total_fs_tree_bytes += buf->len;
6417 if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
6418 total_extent_tree_bytes += buf->len;
6419 if (!found_old_backref &&
6420 btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
6421 btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
6422 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
6423 found_old_backref = 1;
6425 free_extent_buffer(buf);
6429 static int add_root_to_pending(struct extent_buffer *buf,
6430 struct cache_tree *extent_cache,
6431 struct cache_tree *pending,
6432 struct cache_tree *seen,
6433 struct cache_tree *nodes,
6436 struct extent_record tmpl;
6438 if (btrfs_header_level(buf) > 0)
6439 add_pending(nodes, seen, buf->start, buf->len);
6441 add_pending(pending, seen, buf->start, buf->len);
6443 memset(&tmpl, 0, sizeof(tmpl));
6444 tmpl.start = buf->start;
6449 tmpl.max_size = buf->len;
6450 add_extent_rec(extent_cache, &tmpl);
6452 if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
6453 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
6454 add_tree_backref(extent_cache, buf->start, buf->start,
6457 add_tree_backref(extent_cache, buf->start, 0, objectid, 1);
6461 /* as we fix the tree, we might be deleting blocks that
6462 * we're tracking for repair. This hook makes sure we
6463 * remove any backrefs for blocks as we are fixing them.
6465 static int free_extent_hook(struct btrfs_trans_handle *trans,
6466 struct btrfs_root *root,
6467 u64 bytenr, u64 num_bytes, u64 parent,
6468 u64 root_objectid, u64 owner, u64 offset,
6471 struct extent_record *rec;
6472 struct cache_extent *cache;
6474 struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
6476 is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
6477 cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
6481 rec = container_of(cache, struct extent_record, cache);
6483 struct data_backref *back;
6484 back = find_data_backref(rec, parent, root_objectid, owner,
6485 offset, 1, bytenr, num_bytes);
6488 if (back->node.found_ref) {
6489 back->found_ref -= refs_to_drop;
6491 rec->refs -= refs_to_drop;
6493 if (back->node.found_extent_tree) {
6494 back->num_refs -= refs_to_drop;
6495 if (rec->extent_item_refs)
6496 rec->extent_item_refs -= refs_to_drop;
6498 if (back->found_ref == 0)
6499 back->node.found_ref = 0;
6500 if (back->num_refs == 0)
6501 back->node.found_extent_tree = 0;
6503 if (!back->node.found_extent_tree && back->node.found_ref) {
6504 rb_erase(&back->node.node, &rec->backref_tree);
6508 struct tree_backref *back;
6509 back = find_tree_backref(rec, parent, root_objectid);
6512 if (back->node.found_ref) {
6515 back->node.found_ref = 0;
6517 if (back->node.found_extent_tree) {
6518 if (rec->extent_item_refs)
6519 rec->extent_item_refs--;
6520 back->node.found_extent_tree = 0;
6522 if (!back->node.found_extent_tree && back->node.found_ref) {
6523 rb_erase(&back->node.node, &rec->backref_tree);
6527 maybe_free_extent_rec(extent_cache, rec);
6532 static int delete_extent_records(struct btrfs_trans_handle *trans,
6533 struct btrfs_root *root,
6534 struct btrfs_path *path,
6535 u64 bytenr, u64 new_len)
6537 struct btrfs_key key;
6538 struct btrfs_key found_key;
6539 struct extent_buffer *leaf;
6544 key.objectid = bytenr;
6546 key.offset = (u64)-1;
6549 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
6556 if (path->slots[0] == 0)
6562 leaf = path->nodes[0];
6563 slot = path->slots[0];
6565 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6566 if (found_key.objectid != bytenr)
6569 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
6570 found_key.type != BTRFS_METADATA_ITEM_KEY &&
6571 found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6572 found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
6573 found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
6574 found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
6575 found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
6576 btrfs_release_path(path);
6577 if (found_key.type == 0) {
6578 if (found_key.offset == 0)
6580 key.offset = found_key.offset - 1;
6581 key.type = found_key.type;
6583 key.type = found_key.type - 1;
6584 key.offset = (u64)-1;
6588 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
6589 found_key.objectid, found_key.type, found_key.offset);
6591 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
6594 btrfs_release_path(path);
6596 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
6597 found_key.type == BTRFS_METADATA_ITEM_KEY) {
6598 u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
6599 found_key.offset : root->nodesize;
6601 ret = btrfs_update_block_group(trans, root, bytenr,
6608 btrfs_release_path(path);
6613 * for a single backref, this will allocate a new extent
6614 * and add the backref to it.
6616 static int record_extent(struct btrfs_trans_handle *trans,
6617 struct btrfs_fs_info *info,
6618 struct btrfs_path *path,
6619 struct extent_record *rec,
6620 struct extent_backref *back,
6621 int allocated, u64 flags)
6624 struct btrfs_root *extent_root = info->extent_root;
6625 struct extent_buffer *leaf;
6626 struct btrfs_key ins_key;
6627 struct btrfs_extent_item *ei;
6628 struct tree_backref *tback;
6629 struct data_backref *dback;
6630 struct btrfs_tree_block_info *bi;
6633 rec->max_size = max_t(u64, rec->max_size,
6634 info->extent_root->nodesize);
6637 u32 item_size = sizeof(*ei);
6640 item_size += sizeof(*bi);
6642 ins_key.objectid = rec->start;
6643 ins_key.offset = rec->max_size;
6644 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
6646 ret = btrfs_insert_empty_item(trans, extent_root, path,
6647 &ins_key, item_size);
6651 leaf = path->nodes[0];
6652 ei = btrfs_item_ptr(leaf, path->slots[0],
6653 struct btrfs_extent_item);
6655 btrfs_set_extent_refs(leaf, ei, 0);
6656 btrfs_set_extent_generation(leaf, ei, rec->generation);
6658 if (back->is_data) {
6659 btrfs_set_extent_flags(leaf, ei,
6660 BTRFS_EXTENT_FLAG_DATA);
6662 struct btrfs_disk_key copy_key;;
6664 tback = to_tree_backref(back);
6665 bi = (struct btrfs_tree_block_info *)(ei + 1);
6666 memset_extent_buffer(leaf, 0, (unsigned long)bi,
6669 btrfs_set_disk_key_objectid(©_key,
6670 rec->info_objectid);
6671 btrfs_set_disk_key_type(©_key, 0);
6672 btrfs_set_disk_key_offset(©_key, 0);
6674 btrfs_set_tree_block_level(leaf, bi, rec->info_level);
6675 btrfs_set_tree_block_key(leaf, bi, ©_key);
6677 btrfs_set_extent_flags(leaf, ei,
6678 BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
6681 btrfs_mark_buffer_dirty(leaf);
6682 ret = btrfs_update_block_group(trans, extent_root, rec->start,
6683 rec->max_size, 1, 0);
6686 btrfs_release_path(path);
6689 if (back->is_data) {
6693 dback = to_data_backref(back);
6694 if (back->full_backref)
6695 parent = dback->parent;
6699 for (i = 0; i < dback->found_ref; i++) {
6700 /* if parent != 0, we're doing a full backref
6701 * passing BTRFS_FIRST_FREE_OBJECTID as the owner
6702 * just makes the backref allocator create a data
6705 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6706 rec->start, rec->max_size,
6710 BTRFS_FIRST_FREE_OBJECTID :
6716 fprintf(stderr, "adding new data backref"
6717 " on %llu %s %llu owner %llu"
6718 " offset %llu found %d\n",
6719 (unsigned long long)rec->start,
6720 back->full_backref ?
6722 back->full_backref ?
6723 (unsigned long long)parent :
6724 (unsigned long long)dback->root,
6725 (unsigned long long)dback->owner,
6726 (unsigned long long)dback->offset,
6731 tback = to_tree_backref(back);
6732 if (back->full_backref)
6733 parent = tback->parent;
6737 ret = btrfs_inc_extent_ref(trans, info->extent_root,
6738 rec->start, rec->max_size,
6739 parent, tback->root, 0, 0);
6740 fprintf(stderr, "adding new tree backref on "
6741 "start %llu len %llu parent %llu root %llu\n",
6742 rec->start, rec->max_size, parent, tback->root);
6745 btrfs_release_path(path);
6749 static struct extent_entry *find_entry(struct list_head *entries,
6750 u64 bytenr, u64 bytes)
6752 struct extent_entry *entry = NULL;
6754 list_for_each_entry(entry, entries, list) {
6755 if (entry->bytenr == bytenr && entry->bytes == bytes)
6762 static struct extent_entry *find_most_right_entry(struct list_head *entries)
6764 struct extent_entry *entry, *best = NULL, *prev = NULL;
6766 list_for_each_entry(entry, entries, list) {
6773 * If there are as many broken entries as entries then we know
6774 * not to trust this particular entry.
6776 if (entry->broken == entry->count)
6780 * If our current entry == best then we can't be sure our best
6781 * is really the best, so we need to keep searching.
6783 if (best && best->count == entry->count) {
6789 /* Prev == entry, not good enough, have to keep searching */
6790 if (!prev->broken && prev->count == entry->count)
6794 best = (prev->count > entry->count) ? prev : entry;
6795 else if (best->count < entry->count)
6803 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
6804 struct data_backref *dback, struct extent_entry *entry)
6806 struct btrfs_trans_handle *trans;
6807 struct btrfs_root *root;
6808 struct btrfs_file_extent_item *fi;
6809 struct extent_buffer *leaf;
6810 struct btrfs_key key;
6814 key.objectid = dback->root;
6815 key.type = BTRFS_ROOT_ITEM_KEY;
6816 key.offset = (u64)-1;
6817 root = btrfs_read_fs_root(info, &key);
6819 fprintf(stderr, "Couldn't find root for our ref\n");
6824 * The backref points to the original offset of the extent if it was
6825 * split, so we need to search down to the offset we have and then walk
6826 * forward until we find the backref we're looking for.
6828 key.objectid = dback->owner;
6829 key.type = BTRFS_EXTENT_DATA_KEY;
6830 key.offset = dback->offset;
6831 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6833 fprintf(stderr, "Error looking up ref %d\n", ret);
6838 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
6839 ret = btrfs_next_leaf(root, path);
6841 fprintf(stderr, "Couldn't find our ref, next\n");
6845 leaf = path->nodes[0];
6846 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
6847 if (key.objectid != dback->owner ||
6848 key.type != BTRFS_EXTENT_DATA_KEY) {
6849 fprintf(stderr, "Couldn't find our ref, search\n");
6852 fi = btrfs_item_ptr(leaf, path->slots[0],
6853 struct btrfs_file_extent_item);
6854 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6855 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
6857 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
6862 btrfs_release_path(path);
6864 trans = btrfs_start_transaction(root, 1);
6866 return PTR_ERR(trans);
6869 * Ok we have the key of the file extent we want to fix, now we can cow
6870 * down to the thing and fix it.
6872 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
6874 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
6875 key.objectid, key.type, key.offset, ret);
6879 fprintf(stderr, "Well that's odd, we just found this key "
6880 "[%Lu, %u, %Lu]\n", key.objectid, key.type,
6885 leaf = path->nodes[0];
6886 fi = btrfs_item_ptr(leaf, path->slots[0],
6887 struct btrfs_file_extent_item);
6889 if (btrfs_file_extent_compression(leaf, fi) &&
6890 dback->disk_bytenr != entry->bytenr) {
6891 fprintf(stderr, "Ref doesn't match the record start and is "
6892 "compressed, please take a btrfs-image of this file "
6893 "system and send it to a btrfs developer so they can "
6894 "complete this functionality for bytenr %Lu\n",
6895 dback->disk_bytenr);
6900 if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
6901 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6902 } else if (dback->disk_bytenr > entry->bytenr) {
6903 u64 off_diff, offset;
6905 off_diff = dback->disk_bytenr - entry->bytenr;
6906 offset = btrfs_file_extent_offset(leaf, fi);
6907 if (dback->disk_bytenr + offset +
6908 btrfs_file_extent_num_bytes(leaf, fi) >
6909 entry->bytenr + entry->bytes) {
6910 fprintf(stderr, "Ref is past the entry end, please "
6911 "take a btrfs-image of this file system and "
6912 "send it to a btrfs developer, ref %Lu\n",
6913 dback->disk_bytenr);
6918 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6919 btrfs_set_file_extent_offset(leaf, fi, offset);
6920 } else if (dback->disk_bytenr < entry->bytenr) {
6923 offset = btrfs_file_extent_offset(leaf, fi);
6924 if (dback->disk_bytenr + offset < entry->bytenr) {
6925 fprintf(stderr, "Ref is before the entry start, please"
6926 " take a btrfs-image of this file system and "
6927 "send it to a btrfs developer, ref %Lu\n",
6928 dback->disk_bytenr);
6933 offset += dback->disk_bytenr;
6934 offset -= entry->bytenr;
6935 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
6936 btrfs_set_file_extent_offset(leaf, fi, offset);
6939 btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
6942 * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
6943 * only do this if we aren't using compression, otherwise it's a
6946 if (!btrfs_file_extent_compression(leaf, fi))
6947 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
6949 printf("ram bytes may be wrong?\n");
6950 btrfs_mark_buffer_dirty(leaf);
6952 err = btrfs_commit_transaction(trans, root);
6953 btrfs_release_path(path);
6954 return ret ? ret : err;
6957 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
6958 struct extent_record *rec)
6960 struct extent_backref *back, *tmp;
6961 struct data_backref *dback;
6962 struct extent_entry *entry, *best = NULL;
6965 int broken_entries = 0;
6970 * Metadata is easy and the backrefs should always agree on bytenr and
6971 * size, if not we've got bigger issues.
6976 rbtree_postorder_for_each_entry_safe(back, tmp,
6977 &rec->backref_tree, node) {
6978 if (back->full_backref || !back->is_data)
6981 dback = to_data_backref(back);
6984 * We only pay attention to backrefs that we found a real
6987 if (dback->found_ref == 0)
6991 * For now we only catch when the bytes don't match, not the
6992 * bytenr. We can easily do this at the same time, but I want
6993 * to have a fs image to test on before we just add repair
6994 * functionality willy-nilly so we know we won't screw up the
6998 entry = find_entry(&entries, dback->disk_bytenr,
7001 entry = malloc(sizeof(struct extent_entry));
7006 memset(entry, 0, sizeof(*entry));
7007 entry->bytenr = dback->disk_bytenr;
7008 entry->bytes = dback->bytes;
7009 list_add_tail(&entry->list, &entries);
7014 * If we only have on entry we may think the entries agree when
7015 * in reality they don't so we have to do some extra checking.
7017 if (dback->disk_bytenr != rec->start ||
7018 dback->bytes != rec->nr || back->broken)
7029 /* Yay all the backrefs agree, carry on good sir */
7030 if (nr_entries <= 1 && !mismatch)
7033 fprintf(stderr, "attempting to repair backref discrepency for bytenr "
7034 "%Lu\n", rec->start);
7037 * First we want to see if the backrefs can agree amongst themselves who
7038 * is right, so figure out which one of the entries has the highest
7041 best = find_most_right_entry(&entries);
7044 * Ok so we may have an even split between what the backrefs think, so
7045 * this is where we use the extent ref to see what it thinks.
7048 entry = find_entry(&entries, rec->start, rec->nr);
7049 if (!entry && (!broken_entries || !rec->found_rec)) {
7050 fprintf(stderr, "Backrefs don't agree with each other "
7051 "and extent record doesn't agree with anybody,"
7052 " so we can't fix bytenr %Lu bytes %Lu\n",
7053 rec->start, rec->nr);
7056 } else if (!entry) {
7058 * Ok our backrefs were broken, we'll assume this is the
7059 * correct value and add an entry for this range.
7061 entry = malloc(sizeof(struct extent_entry));
7066 memset(entry, 0, sizeof(*entry));
7067 entry->bytenr = rec->start;
7068 entry->bytes = rec->nr;
7069 list_add_tail(&entry->list, &entries);
7073 best = find_most_right_entry(&entries);
7075 fprintf(stderr, "Backrefs and extent record evenly "
7076 "split on who is right, this is going to "
7077 "require user input to fix bytenr %Lu bytes "
7078 "%Lu\n", rec->start, rec->nr);
7085 * I don't think this can happen currently as we'll abort() if we catch
7086 * this case higher up, but in case somebody removes that we still can't
7087 * deal with it properly here yet, so just bail out of that's the case.
7089 if (best->bytenr != rec->start) {
7090 fprintf(stderr, "Extent start and backref starts don't match, "
7091 "please use btrfs-image on this file system and send "
7092 "it to a btrfs developer so they can make fsck fix "
7093 "this particular case. bytenr is %Lu, bytes is %Lu\n",
7094 rec->start, rec->nr);
7100 * Ok great we all agreed on an extent record, let's go find the real
7101 * references and fix up the ones that don't match.
7103 rbtree_postorder_for_each_entry_safe(back, tmp,
7104 &rec->backref_tree, node) {
7105 if (back->full_backref || !back->is_data)
7108 dback = to_data_backref(back);
7111 * Still ignoring backrefs that don't have a real ref attached
7114 if (dback->found_ref == 0)
7117 if (dback->bytes == best->bytes &&
7118 dback->disk_bytenr == best->bytenr)
7121 ret = repair_ref(info, path, dback, best);
7127 * Ok we messed with the actual refs, which means we need to drop our
7128 * entire cache and go back and rescan. I know this is a huge pain and
7129 * adds a lot of extra work, but it's the only way to be safe. Once all
7130 * the backrefs agree we may not need to do anything to the extent
7135 while (!list_empty(&entries)) {
7136 entry = list_entry(entries.next, struct extent_entry, list);
7137 list_del_init(&entry->list);
7143 static int process_duplicates(struct btrfs_root *root,
7144 struct cache_tree *extent_cache,
7145 struct extent_record *rec)
7147 struct extent_record *good, *tmp;
7148 struct cache_extent *cache;
7152 * If we found a extent record for this extent then return, or if we
7153 * have more than one duplicate we are likely going to need to delete
7156 if (rec->found_rec || rec->num_duplicates > 1)
7159 /* Shouldn't happen but just in case */
7160 BUG_ON(!rec->num_duplicates);
7163 * So this happens if we end up with a backref that doesn't match the
7164 * actual extent entry. So either the backref is bad or the extent
7165 * entry is bad. Either way we want to have the extent_record actually
7166 * reflect what we found in the extent_tree, so we need to take the
7167 * duplicate out and use that as the extent_record since the only way we
7168 * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
7170 remove_cache_extent(extent_cache, &rec->cache);
7172 good = to_extent_record(rec->dups.next);
7173 list_del_init(&good->list);
7174 INIT_LIST_HEAD(&good->backrefs);
7175 INIT_LIST_HEAD(&good->dups);
7176 good->cache.start = good->start;
7177 good->cache.size = good->nr;
7178 good->content_checked = 0;
7179 good->owner_ref_checked = 0;
7180 good->num_duplicates = 0;
7181 good->refs = rec->refs;
7182 list_splice_init(&rec->backrefs, &good->backrefs);
7184 cache = lookup_cache_extent(extent_cache, good->start,
7188 tmp = container_of(cache, struct extent_record, cache);
7191 * If we find another overlapping extent and it's found_rec is
7192 * set then it's a duplicate and we need to try and delete
7195 if (tmp->found_rec || tmp->num_duplicates > 0) {
7196 if (list_empty(&good->list))
7197 list_add_tail(&good->list,
7198 &duplicate_extents);
7199 good->num_duplicates += tmp->num_duplicates + 1;
7200 list_splice_init(&tmp->dups, &good->dups);
7201 list_del_init(&tmp->list);
7202 list_add_tail(&tmp->list, &good->dups);
7203 remove_cache_extent(extent_cache, &tmp->cache);
7208 * Ok we have another non extent item backed extent rec, so lets
7209 * just add it to this extent and carry on like we did above.
7211 good->refs += tmp->refs;
7212 list_splice_init(&tmp->backrefs, &good->backrefs);
7213 remove_cache_extent(extent_cache, &tmp->cache);
7216 ret = insert_cache_extent(extent_cache, &good->cache);
7219 return good->num_duplicates ? 0 : 1;
7222 static int delete_duplicate_records(struct btrfs_root *root,
7223 struct extent_record *rec)
7225 struct btrfs_trans_handle *trans;
7226 LIST_HEAD(delete_list);
7227 struct btrfs_path *path;
7228 struct extent_record *tmp, *good, *n;
7231 struct btrfs_key key;
7233 path = btrfs_alloc_path();
7240 /* Find the record that covers all of the duplicates. */
7241 list_for_each_entry(tmp, &rec->dups, list) {
7242 if (good->start < tmp->start)
7244 if (good->nr > tmp->nr)
7247 if (tmp->start + tmp->nr < good->start + good->nr) {
7248 fprintf(stderr, "Ok we have overlapping extents that "
7249 "aren't completely covered by each other, this "
7250 "is going to require more careful thought. "
7251 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
7252 tmp->start, tmp->nr, good->start, good->nr);
7259 list_add_tail(&rec->list, &delete_list);
7261 list_for_each_entry_safe(tmp, n, &rec->dups, list) {
7264 list_move_tail(&tmp->list, &delete_list);
7267 root = root->fs_info->extent_root;
7268 trans = btrfs_start_transaction(root, 1);
7269 if (IS_ERR(trans)) {
7270 ret = PTR_ERR(trans);
7274 list_for_each_entry(tmp, &delete_list, list) {
7275 if (tmp->found_rec == 0)
7277 key.objectid = tmp->start;
7278 key.type = BTRFS_EXTENT_ITEM_KEY;
7279 key.offset = tmp->nr;
7281 /* Shouldn't happen but just in case */
7282 if (tmp->metadata) {
7283 fprintf(stderr, "Well this shouldn't happen, extent "
7284 "record overlaps but is metadata? "
7285 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
7289 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
7295 ret = btrfs_del_item(trans, root, path);
7298 btrfs_release_path(path);
7301 err = btrfs_commit_transaction(trans, root);
7305 while (!list_empty(&delete_list)) {
7306 tmp = to_extent_record(delete_list.next);
7307 list_del_init(&tmp->list);
7313 while (!list_empty(&rec->dups)) {
7314 tmp = to_extent_record(rec->dups.next);
7315 list_del_init(&tmp->list);
7319 btrfs_free_path(path);
7321 if (!ret && !nr_del)
7322 rec->num_duplicates = 0;
7324 return ret ? ret : nr_del;
7327 static int find_possible_backrefs(struct btrfs_fs_info *info,
7328 struct btrfs_path *path,
7329 struct cache_tree *extent_cache,
7330 struct extent_record *rec)
7332 struct btrfs_root *root;
7333 struct extent_backref *back, *tmp;
7334 struct data_backref *dback;
7335 struct cache_extent *cache;
7336 struct btrfs_file_extent_item *fi;
7337 struct btrfs_key key;
7341 rbtree_postorder_for_each_entry_safe(back, tmp,
7342 &rec->backref_tree, node) {
7343 /* Don't care about full backrefs (poor unloved backrefs) */
7344 if (back->full_backref || !back->is_data)
7347 dback = to_data_backref(back);
7349 /* We found this one, we don't need to do a lookup */
7350 if (dback->found_ref)
7353 key.objectid = dback->root;
7354 key.type = BTRFS_ROOT_ITEM_KEY;
7355 key.offset = (u64)-1;
7357 root = btrfs_read_fs_root(info, &key);
7359 /* No root, definitely a bad ref, skip */
7360 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
7362 /* Other err, exit */
7364 return PTR_ERR(root);
7366 key.objectid = dback->owner;
7367 key.type = BTRFS_EXTENT_DATA_KEY;
7368 key.offset = dback->offset;
7369 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7371 btrfs_release_path(path);
7374 /* Didn't find it, we can carry on */
7379 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
7380 struct btrfs_file_extent_item);
7381 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
7382 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
7383 btrfs_release_path(path);
7384 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7386 struct extent_record *tmp;
7387 tmp = container_of(cache, struct extent_record, cache);
7390 * If we found an extent record for the bytenr for this
7391 * particular backref then we can't add it to our
7392 * current extent record. We only want to add backrefs
7393 * that don't have a corresponding extent item in the
7394 * extent tree since they likely belong to this record
7395 * and we need to fix it if it doesn't match bytenrs.
7401 dback->found_ref += 1;
7402 dback->disk_bytenr = bytenr;
7403 dback->bytes = bytes;
7406 * Set this so the verify backref code knows not to trust the
7407 * values in this backref.
7416 * Record orphan data ref into corresponding root.
7418 * Return 0 if the extent item contains data ref and recorded.
7419 * Return 1 if the extent item contains no useful data ref
7420 * On that case, it may contains only shared_dataref or metadata backref
7421 * or the file extent exists(this should be handled by the extent bytenr
7423 * Return <0 if something goes wrong.
7425 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
7426 struct extent_record *rec)
7428 struct btrfs_key key;
7429 struct btrfs_root *dest_root;
7430 struct extent_backref *back, *tmp;
7431 struct data_backref *dback;
7432 struct orphan_data_extent *orphan;
7433 struct btrfs_path *path;
7434 int recorded_data_ref = 0;
7439 path = btrfs_alloc_path();
7442 rbtree_postorder_for_each_entry_safe(back, tmp,
7443 &rec->backref_tree, node) {
7444 if (back->full_backref || !back->is_data ||
7445 !back->found_extent_tree)
7447 dback = to_data_backref(back);
7448 if (dback->found_ref)
7450 key.objectid = dback->root;
7451 key.type = BTRFS_ROOT_ITEM_KEY;
7452 key.offset = (u64)-1;
7454 dest_root = btrfs_read_fs_root(fs_info, &key);
7456 /* For non-exist root we just skip it */
7457 if (IS_ERR(dest_root) || !dest_root)
7460 key.objectid = dback->owner;
7461 key.type = BTRFS_EXTENT_DATA_KEY;
7462 key.offset = dback->offset;
7464 ret = btrfs_search_slot(NULL, dest_root, &key, path, 0, 0);
7466 * For ret < 0, it's OK since the fs-tree may be corrupted,
7467 * we need to record it for inode/file extent rebuild.
7468 * For ret > 0, we record it only for file extent rebuild.
7469 * For ret == 0, the file extent exists but only bytenr
7470 * mismatch, let the original bytenr fix routine to handle,
7476 orphan = malloc(sizeof(*orphan));
7481 INIT_LIST_HEAD(&orphan->list);
7482 orphan->root = dback->root;
7483 orphan->objectid = dback->owner;
7484 orphan->offset = dback->offset;
7485 orphan->disk_bytenr = rec->cache.start;
7486 orphan->disk_len = rec->cache.size;
7487 list_add(&dest_root->orphan_data_extents, &orphan->list);
7488 recorded_data_ref = 1;
7491 btrfs_free_path(path);
7493 return !recorded_data_ref;
7499 * when an incorrect extent item is found, this will delete
7500 * all of the existing entries for it and recreate them
7501 * based on what the tree scan found.
7503 static int fixup_extent_refs(struct btrfs_fs_info *info,
7504 struct cache_tree *extent_cache,
7505 struct extent_record *rec)
7507 struct btrfs_trans_handle *trans = NULL;
7509 struct btrfs_path *path;
7510 struct cache_extent *cache;
7511 struct extent_backref *back, *tmp;
7515 if (rec->flag_block_full_backref)
7516 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7518 path = btrfs_alloc_path();
7522 if (rec->refs != rec->extent_item_refs && !rec->metadata) {
7524 * Sometimes the backrefs themselves are so broken they don't
7525 * get attached to any meaningful rec, so first go back and
7526 * check any of our backrefs that we couldn't find and throw
7527 * them into the list if we find the backref so that
7528 * verify_backrefs can figure out what to do.
7530 ret = find_possible_backrefs(info, path, extent_cache, rec);
7535 /* step one, make sure all of the backrefs agree */
7536 ret = verify_backrefs(info, path, rec);
7540 trans = btrfs_start_transaction(info->extent_root, 1);
7541 if (IS_ERR(trans)) {
7542 ret = PTR_ERR(trans);
7546 /* step two, delete all the existing records */
7547 ret = delete_extent_records(trans, info->extent_root, path,
7548 rec->start, rec->max_size);
7553 /* was this block corrupt? If so, don't add references to it */
7554 cache = lookup_cache_extent(info->corrupt_blocks,
7555 rec->start, rec->max_size);
7561 /* step three, recreate all the refs we did find */
7562 rbtree_postorder_for_each_entry_safe(back, tmp,
7563 &rec->backref_tree, node) {
7565 * if we didn't find any references, don't create a
7568 if (!back->found_ref)
7571 rec->bad_full_backref = 0;
7572 ret = record_extent(trans, info, path, rec, back, allocated, flags);
7580 int err = btrfs_commit_transaction(trans, info->extent_root);
7585 btrfs_free_path(path);
7589 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
7590 struct extent_record *rec)
7592 struct btrfs_trans_handle *trans;
7593 struct btrfs_root *root = fs_info->extent_root;
7594 struct btrfs_path *path;
7595 struct btrfs_extent_item *ei;
7596 struct btrfs_key key;
7600 key.objectid = rec->start;
7601 if (rec->metadata) {
7602 key.type = BTRFS_METADATA_ITEM_KEY;
7603 key.offset = rec->info_level;
7605 key.type = BTRFS_EXTENT_ITEM_KEY;
7606 key.offset = rec->max_size;
7609 path = btrfs_alloc_path();
7613 trans = btrfs_start_transaction(root, 0);
7614 if (IS_ERR(trans)) {
7615 btrfs_free_path(path);
7616 return PTR_ERR(trans);
7619 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7621 btrfs_free_path(path);
7622 btrfs_commit_transaction(trans, root);
7625 fprintf(stderr, "Didn't find extent for %llu\n",
7626 (unsigned long long)rec->start);
7627 btrfs_free_path(path);
7628 btrfs_commit_transaction(trans, root);
7632 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
7633 struct btrfs_extent_item);
7634 flags = btrfs_extent_flags(path->nodes[0], ei);
7635 if (rec->flag_block_full_backref) {
7636 fprintf(stderr, "setting full backref on %llu\n",
7637 (unsigned long long)key.objectid);
7638 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
7640 fprintf(stderr, "clearing full backref on %llu\n",
7641 (unsigned long long)key.objectid);
7642 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
7644 btrfs_set_extent_flags(path->nodes[0], ei, flags);
7645 btrfs_mark_buffer_dirty(path->nodes[0]);
7646 btrfs_free_path(path);
7647 return btrfs_commit_transaction(trans, root);
7650 /* right now we only prune from the extent allocation tree */
7651 static int prune_one_block(struct btrfs_trans_handle *trans,
7652 struct btrfs_fs_info *info,
7653 struct btrfs_corrupt_block *corrupt)
7656 struct btrfs_path path;
7657 struct extent_buffer *eb;
7661 int level = corrupt->level + 1;
7663 btrfs_init_path(&path);
7665 /* we want to stop at the parent to our busted block */
7666 path.lowest_level = level;
7668 ret = btrfs_search_slot(trans, info->extent_root,
7669 &corrupt->key, &path, -1, 1);
7674 eb = path.nodes[level];
7681 * hopefully the search gave us the block we want to prune,
7682 * lets try that first
7684 slot = path.slots[level];
7685 found = btrfs_node_blockptr(eb, slot);
7686 if (found == corrupt->cache.start)
7689 nritems = btrfs_header_nritems(eb);
7691 /* the search failed, lets scan this node and hope we find it */
7692 for (slot = 0; slot < nritems; slot++) {
7693 found = btrfs_node_blockptr(eb, slot);
7694 if (found == corrupt->cache.start)
7698 * we couldn't find the bad block. TODO, search all the nodes for pointers
7701 if (eb == info->extent_root->node) {
7706 btrfs_release_path(&path);
7711 printk("deleting pointer to block %Lu\n", corrupt->cache.start);
7712 ret = btrfs_del_ptr(trans, info->extent_root, &path, level, slot);
7715 btrfs_release_path(&path);
7719 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
7721 struct btrfs_trans_handle *trans = NULL;
7722 struct cache_extent *cache;
7723 struct btrfs_corrupt_block *corrupt;
7726 cache = search_cache_extent(info->corrupt_blocks, 0);
7730 trans = btrfs_start_transaction(info->extent_root, 1);
7732 return PTR_ERR(trans);
7734 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
7735 prune_one_block(trans, info, corrupt);
7736 remove_cache_extent(info->corrupt_blocks, cache);
7739 return btrfs_commit_transaction(trans, info->extent_root);
7743 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
7745 struct btrfs_block_group_cache *cache;
7750 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
7751 &start, &end, EXTENT_DIRTY);
7754 clear_extent_dirty(&fs_info->free_space_cache, start, end,
7760 cache = btrfs_lookup_first_block_group(fs_info, start);
7765 start = cache->key.objectid + cache->key.offset;
7769 static int check_extent_refs(struct btrfs_root *root,
7770 struct cache_tree *extent_cache)
7772 struct extent_record *rec;
7773 struct cache_extent *cache;
7782 * if we're doing a repair, we have to make sure
7783 * we don't allocate from the problem extents.
7784 * In the worst case, this will be all the
7787 cache = search_cache_extent(extent_cache, 0);
7789 rec = container_of(cache, struct extent_record, cache);
7790 set_extent_dirty(root->fs_info->excluded_extents,
7792 rec->start + rec->max_size - 1,
7794 cache = next_cache_extent(cache);
7797 /* pin down all the corrupted blocks too */
7798 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
7800 set_extent_dirty(root->fs_info->excluded_extents,
7802 cache->start + cache->size - 1,
7804 cache = next_cache_extent(cache);
7806 prune_corrupt_blocks(root->fs_info);
7807 reset_cached_block_groups(root->fs_info);
7810 reset_cached_block_groups(root->fs_info);
7813 * We need to delete any duplicate entries we find first otherwise we
7814 * could mess up the extent tree when we have backrefs that actually
7815 * belong to a different extent item and not the weird duplicate one.
7817 while (repair && !list_empty(&duplicate_extents)) {
7818 rec = to_extent_record(duplicate_extents.next);
7819 list_del_init(&rec->list);
7821 /* Sometimes we can find a backref before we find an actual
7822 * extent, so we need to process it a little bit to see if there
7823 * truly are multiple EXTENT_ITEM_KEY's for the same range, or
7824 * if this is a backref screwup. If we need to delete stuff
7825 * process_duplicates() will return 0, otherwise it will return
7828 if (process_duplicates(root, extent_cache, rec))
7830 ret = delete_duplicate_records(root, rec);
7834 * delete_duplicate_records will return the number of entries
7835 * deleted, so if it's greater than 0 then we know we actually
7836 * did something and we need to remove.
7850 cache = search_cache_extent(extent_cache, 0);
7853 rec = container_of(cache, struct extent_record, cache);
7854 if (rec->num_duplicates) {
7855 fprintf(stderr, "extent item %llu has multiple extent "
7856 "items\n", (unsigned long long)rec->start);
7861 if (rec->refs != rec->extent_item_refs) {
7862 fprintf(stderr, "ref mismatch on [%llu %llu] ",
7863 (unsigned long long)rec->start,
7864 (unsigned long long)rec->nr);
7865 fprintf(stderr, "extent item %llu, found %llu\n",
7866 (unsigned long long)rec->extent_item_refs,
7867 (unsigned long long)rec->refs);
7868 ret = record_orphan_data_extents(root->fs_info, rec);
7875 * we can't use the extent to repair file
7876 * extent, let the fallback method handle it.
7878 if (!fixed && repair) {
7879 ret = fixup_extent_refs(
7890 if (all_backpointers_checked(rec, 1)) {
7891 fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
7892 (unsigned long long)rec->start,
7893 (unsigned long long)rec->nr);
7895 if (!fixed && !recorded && repair) {
7896 ret = fixup_extent_refs(root->fs_info,
7905 if (!rec->owner_ref_checked) {
7906 fprintf(stderr, "owner ref check failed [%llu %llu]\n",
7907 (unsigned long long)rec->start,
7908 (unsigned long long)rec->nr);
7909 if (!fixed && !recorded && repair) {
7910 ret = fixup_extent_refs(root->fs_info,
7919 if (rec->bad_full_backref) {
7920 fprintf(stderr, "bad full backref, on [%llu]\n",
7921 (unsigned long long)rec->start);
7923 ret = fixup_extent_flags(root->fs_info, rec);
7932 * Although it's not a extent ref's problem, we reuse this
7933 * routine for error reporting.
7934 * No repair function yet.
7936 if (rec->crossing_stripes) {
7938 "bad metadata [%llu, %llu) crossing stripe boundary\n",
7939 rec->start, rec->start + rec->max_size);
7944 if (rec->wrong_chunk_type) {
7946 "bad extent [%llu, %llu), type mismatch with chunk\n",
7947 rec->start, rec->start + rec->max_size);
7952 remove_cache_extent(extent_cache, cache);
7953 free_all_extent_backrefs(rec);
7954 if (!init_extent_tree && repair && (!cur_err || fixed))
7955 clear_extent_dirty(root->fs_info->excluded_extents,
7957 rec->start + rec->max_size - 1,
7963 if (ret && ret != -EAGAIN) {
7964 fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
7967 struct btrfs_trans_handle *trans;
7969 root = root->fs_info->extent_root;
7970 trans = btrfs_start_transaction(root, 1);
7971 if (IS_ERR(trans)) {
7972 ret = PTR_ERR(trans);
7976 btrfs_fix_block_accounting(trans, root);
7977 ret = btrfs_commit_transaction(trans, root);
7982 fprintf(stderr, "repaired damaged extent references\n");
7988 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
7992 if (type & BTRFS_BLOCK_GROUP_RAID0) {
7993 stripe_size = length;
7994 stripe_size /= num_stripes;
7995 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
7996 stripe_size = length * 2;
7997 stripe_size /= num_stripes;
7998 } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
7999 stripe_size = length;
8000 stripe_size /= (num_stripes - 1);
8001 } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
8002 stripe_size = length;
8003 stripe_size /= (num_stripes - 2);
8005 stripe_size = length;
8011 * Check the chunk with its block group/dev list ref:
8012 * Return 0 if all refs seems valid.
8013 * Return 1 if part of refs seems valid, need later check for rebuild ref
8014 * like missing block group and needs to search extent tree to rebuild them.
8015 * Return -1 if essential refs are missing and unable to rebuild.
8017 static int check_chunk_refs(struct chunk_record *chunk_rec,
8018 struct block_group_tree *block_group_cache,
8019 struct device_extent_tree *dev_extent_cache,
8022 struct cache_extent *block_group_item;
8023 struct block_group_record *block_group_rec;
8024 struct cache_extent *dev_extent_item;
8025 struct device_extent_record *dev_extent_rec;
8029 int metadump_v2 = 0;
8033 block_group_item = lookup_cache_extent(&block_group_cache->tree,
8036 if (block_group_item) {
8037 block_group_rec = container_of(block_group_item,
8038 struct block_group_record,
8040 if (chunk_rec->length != block_group_rec->offset ||
8041 chunk_rec->offset != block_group_rec->objectid ||
8043 chunk_rec->type_flags != block_group_rec->flags)) {
8046 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
8047 chunk_rec->objectid,
8052 chunk_rec->type_flags,
8053 block_group_rec->objectid,
8054 block_group_rec->type,
8055 block_group_rec->offset,
8056 block_group_rec->offset,
8057 block_group_rec->objectid,
8058 block_group_rec->flags);
8061 list_del_init(&block_group_rec->list);
8062 chunk_rec->bg_rec = block_group_rec;
8067 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
8068 chunk_rec->objectid,
8073 chunk_rec->type_flags);
8080 length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
8081 chunk_rec->num_stripes);
8082 for (i = 0; i < chunk_rec->num_stripes; ++i) {
8083 devid = chunk_rec->stripes[i].devid;
8084 offset = chunk_rec->stripes[i].offset;
8085 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
8086 devid, offset, length);
8087 if (dev_extent_item) {
8088 dev_extent_rec = container_of(dev_extent_item,
8089 struct device_extent_record,
8091 if (dev_extent_rec->objectid != devid ||
8092 dev_extent_rec->offset != offset ||
8093 dev_extent_rec->chunk_offset != chunk_rec->offset ||
8094 dev_extent_rec->length != length) {
8097 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
8098 chunk_rec->objectid,
8101 chunk_rec->stripes[i].devid,
8102 chunk_rec->stripes[i].offset,
8103 dev_extent_rec->objectid,
8104 dev_extent_rec->offset,
8105 dev_extent_rec->length);
8108 list_move(&dev_extent_rec->chunk_list,
8109 &chunk_rec->dextents);
8114 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
8115 chunk_rec->objectid,
8118 chunk_rec->stripes[i].devid,
8119 chunk_rec->stripes[i].offset);
8126 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
8127 int check_chunks(struct cache_tree *chunk_cache,
8128 struct block_group_tree *block_group_cache,
8129 struct device_extent_tree *dev_extent_cache,
8130 struct list_head *good, struct list_head *bad,
8131 struct list_head *rebuild, int silent)
8133 struct cache_extent *chunk_item;
8134 struct chunk_record *chunk_rec;
8135 struct block_group_record *bg_rec;
8136 struct device_extent_record *dext_rec;
8140 chunk_item = first_cache_extent(chunk_cache);
8141 while (chunk_item) {
8142 chunk_rec = container_of(chunk_item, struct chunk_record,
8144 err = check_chunk_refs(chunk_rec, block_group_cache,
8145 dev_extent_cache, silent);
8148 if (err == 0 && good)
8149 list_add_tail(&chunk_rec->list, good);
8150 if (err > 0 && rebuild)
8151 list_add_tail(&chunk_rec->list, rebuild);
8153 list_add_tail(&chunk_rec->list, bad);
8154 chunk_item = next_cache_extent(chunk_item);
8157 list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
8160 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
8168 list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
8172 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
8183 static int check_device_used(struct device_record *dev_rec,
8184 struct device_extent_tree *dext_cache)
8186 struct cache_extent *cache;
8187 struct device_extent_record *dev_extent_rec;
8190 cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
8192 dev_extent_rec = container_of(cache,
8193 struct device_extent_record,
8195 if (dev_extent_rec->objectid != dev_rec->devid)
8198 list_del_init(&dev_extent_rec->device_list);
8199 total_byte += dev_extent_rec->length;
8200 cache = next_cache_extent(cache);
8203 if (total_byte != dev_rec->byte_used) {
8205 "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
8206 total_byte, dev_rec->byte_used, dev_rec->objectid,
8207 dev_rec->type, dev_rec->offset);
8214 /* check btrfs_dev_item -> btrfs_dev_extent */
8215 static int check_devices(struct rb_root *dev_cache,
8216 struct device_extent_tree *dev_extent_cache)
8218 struct rb_node *dev_node;
8219 struct device_record *dev_rec;
8220 struct device_extent_record *dext_rec;
8224 dev_node = rb_first(dev_cache);
8226 dev_rec = container_of(dev_node, struct device_record, node);
8227 err = check_device_used(dev_rec, dev_extent_cache);
8231 dev_node = rb_next(dev_node);
8233 list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
8236 "Device extent[%llu, %llu, %llu] didn't find its device.\n",
8237 dext_rec->objectid, dext_rec->offset, dext_rec->length);
8244 static int add_root_item_to_list(struct list_head *head,
8245 u64 objectid, u64 bytenr, u64 last_snapshot,
8246 u8 level, u8 drop_level,
8247 int level_size, struct btrfs_key *drop_key)
8250 struct root_item_record *ri_rec;
8251 ri_rec = malloc(sizeof(*ri_rec));
8254 ri_rec->bytenr = bytenr;
8255 ri_rec->objectid = objectid;
8256 ri_rec->level = level;
8257 ri_rec->level_size = level_size;
8258 ri_rec->drop_level = drop_level;
8259 ri_rec->last_snapshot = last_snapshot;
8261 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
8262 list_add_tail(&ri_rec->list, head);
8267 static void free_root_item_list(struct list_head *list)
8269 struct root_item_record *ri_rec;
8271 while (!list_empty(list)) {
8272 ri_rec = list_first_entry(list, struct root_item_record,
8274 list_del_init(&ri_rec->list);
8279 static int deal_root_from_list(struct list_head *list,
8280 struct btrfs_root *root,
8281 struct block_info *bits,
8283 struct cache_tree *pending,
8284 struct cache_tree *seen,
8285 struct cache_tree *reada,
8286 struct cache_tree *nodes,
8287 struct cache_tree *extent_cache,
8288 struct cache_tree *chunk_cache,
8289 struct rb_root *dev_cache,
8290 struct block_group_tree *block_group_cache,
8291 struct device_extent_tree *dev_extent_cache)
8296 while (!list_empty(list)) {
8297 struct root_item_record *rec;
8298 struct extent_buffer *buf;
8299 rec = list_entry(list->next,
8300 struct root_item_record, list);
8302 buf = read_tree_block(root->fs_info->tree_root,
8303 rec->bytenr, rec->level_size, 0);
8304 if (!extent_buffer_uptodate(buf)) {
8305 free_extent_buffer(buf);
8309 add_root_to_pending(buf, extent_cache, pending,
8310 seen, nodes, rec->objectid);
8312 * To rebuild extent tree, we need deal with snapshot
8313 * one by one, otherwise we deal with node firstly which
8314 * can maximize readahead.
8317 ret = run_next_block(root, bits, bits_nr, &last,
8318 pending, seen, reada, nodes,
8319 extent_cache, chunk_cache,
8320 dev_cache, block_group_cache,
8321 dev_extent_cache, rec);
8325 free_extent_buffer(buf);
8326 list_del(&rec->list);
8332 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
8333 reada, nodes, extent_cache, chunk_cache,
8334 dev_cache, block_group_cache,
8335 dev_extent_cache, NULL);
8345 static int check_chunks_and_extents(struct btrfs_root *root)
8347 struct rb_root dev_cache;
8348 struct cache_tree chunk_cache;
8349 struct block_group_tree block_group_cache;
8350 struct device_extent_tree dev_extent_cache;
8351 struct cache_tree extent_cache;
8352 struct cache_tree seen;
8353 struct cache_tree pending;
8354 struct cache_tree reada;
8355 struct cache_tree nodes;
8356 struct extent_io_tree excluded_extents;
8357 struct cache_tree corrupt_blocks;
8358 struct btrfs_path path;
8359 struct btrfs_key key;
8360 struct btrfs_key found_key;
8362 struct block_info *bits;
8364 struct extent_buffer *leaf;
8366 struct btrfs_root_item ri;
8367 struct list_head dropping_trees;
8368 struct list_head normal_trees;
8369 struct btrfs_root *root1;
8374 dev_cache = RB_ROOT;
8375 cache_tree_init(&chunk_cache);
8376 block_group_tree_init(&block_group_cache);
8377 device_extent_tree_init(&dev_extent_cache);
8379 cache_tree_init(&extent_cache);
8380 cache_tree_init(&seen);
8381 cache_tree_init(&pending);
8382 cache_tree_init(&nodes);
8383 cache_tree_init(&reada);
8384 cache_tree_init(&corrupt_blocks);
8385 extent_io_tree_init(&excluded_extents);
8386 INIT_LIST_HEAD(&dropping_trees);
8387 INIT_LIST_HEAD(&normal_trees);
8390 root->fs_info->excluded_extents = &excluded_extents;
8391 root->fs_info->fsck_extent_cache = &extent_cache;
8392 root->fs_info->free_extent_hook = free_extent_hook;
8393 root->fs_info->corrupt_blocks = &corrupt_blocks;
8397 bits = malloc(bits_nr * sizeof(struct block_info));
8403 if (ctx.progress_enabled) {
8404 ctx.tp = TASK_EXTENTS;
8405 task_start(ctx.info);
8409 root1 = root->fs_info->tree_root;
8410 level = btrfs_header_level(root1->node);
8411 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8412 root1->node->start, 0, level, 0,
8413 root1->nodesize, NULL);
8416 root1 = root->fs_info->chunk_root;
8417 level = btrfs_header_level(root1->node);
8418 ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
8419 root1->node->start, 0, level, 0,
8420 root1->nodesize, NULL);
8423 btrfs_init_path(&path);
8426 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
8427 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
8432 leaf = path.nodes[0];
8433 slot = path.slots[0];
8434 if (slot >= btrfs_header_nritems(path.nodes[0])) {
8435 ret = btrfs_next_leaf(root, &path);
8438 leaf = path.nodes[0];
8439 slot = path.slots[0];
8441 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
8442 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
8443 unsigned long offset;
8446 offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8447 read_extent_buffer(leaf, &ri, offset, sizeof(ri));
8448 last_snapshot = btrfs_root_last_snapshot(&ri);
8449 if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
8450 level = btrfs_root_level(&ri);
8451 level_size = root->nodesize;
8452 ret = add_root_item_to_list(&normal_trees,
8454 btrfs_root_bytenr(&ri),
8455 last_snapshot, level,
8456 0, level_size, NULL);
8460 level = btrfs_root_level(&ri);
8461 level_size = root->nodesize;
8462 objectid = found_key.objectid;
8463 btrfs_disk_key_to_cpu(&found_key,
8465 ret = add_root_item_to_list(&dropping_trees,
8467 btrfs_root_bytenr(&ri),
8468 last_snapshot, level,
8470 level_size, &found_key);
8477 btrfs_release_path(&path);
8480 * check_block can return -EAGAIN if it fixes something, please keep
8481 * this in mind when dealing with return values from these functions, if
8482 * we get -EAGAIN we want to fall through and restart the loop.
8484 ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
8485 &seen, &reada, &nodes, &extent_cache,
8486 &chunk_cache, &dev_cache, &block_group_cache,
8493 ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
8494 &pending, &seen, &reada, &nodes,
8495 &extent_cache, &chunk_cache, &dev_cache,
8496 &block_group_cache, &dev_extent_cache);
8503 ret = check_chunks(&chunk_cache, &block_group_cache,
8504 &dev_extent_cache, NULL, NULL, NULL, 0);
8511 ret = check_extent_refs(root, &extent_cache);
8518 ret = check_devices(&dev_cache, &dev_extent_cache);
8523 task_stop(ctx.info);
8525 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8526 extent_io_tree_cleanup(&excluded_extents);
8527 root->fs_info->fsck_extent_cache = NULL;
8528 root->fs_info->free_extent_hook = NULL;
8529 root->fs_info->corrupt_blocks = NULL;
8530 root->fs_info->excluded_extents = NULL;
8533 free_chunk_cache_tree(&chunk_cache);
8534 free_device_cache_tree(&dev_cache);
8535 free_block_group_tree(&block_group_cache);
8536 free_device_extent_tree(&dev_extent_cache);
8537 free_extent_cache_tree(&seen);
8538 free_extent_cache_tree(&pending);
8539 free_extent_cache_tree(&reada);
8540 free_extent_cache_tree(&nodes);
8543 free_corrupt_blocks_tree(root->fs_info->corrupt_blocks);
8544 free_extent_cache_tree(&seen);
8545 free_extent_cache_tree(&pending);
8546 free_extent_cache_tree(&reada);
8547 free_extent_cache_tree(&nodes);
8548 free_chunk_cache_tree(&chunk_cache);
8549 free_block_group_tree(&block_group_cache);
8550 free_device_cache_tree(&dev_cache);
8551 free_device_extent_tree(&dev_extent_cache);
8552 free_extent_record_cache(root->fs_info, &extent_cache);
8553 free_root_item_list(&normal_trees);
8554 free_root_item_list(&dropping_trees);
8555 extent_io_tree_cleanup(&excluded_extents);
8560 * Check backrefs of a tree block given by @bytenr or @eb.
8562 * @root: the root containing the @bytenr or @eb
8563 * @eb: tree block extent buffer, can be NULL
8564 * @bytenr: bytenr of the tree block to search
8565 * @level: tree level of the tree block
8566 * @owner: owner of the tree block
8568 * Return >0 for any error found and output error message
8569 * Return 0 for no error found
8571 static int check_tree_block_ref(struct btrfs_root *root,
8572 struct extent_buffer *eb, u64 bytenr,
8573 int level, u64 owner)
8575 struct btrfs_key key;
8576 struct btrfs_root *extent_root = root->fs_info->extent_root;
8577 struct btrfs_path path;
8578 struct btrfs_extent_item *ei;
8579 struct btrfs_extent_inline_ref *iref;
8580 struct extent_buffer *leaf;
8586 u32 nodesize = root->nodesize;
8593 btrfs_init_path(&path);
8594 key.objectid = bytenr;
8595 if (btrfs_fs_incompat(root->fs_info,
8596 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA))
8597 key.type = BTRFS_METADATA_ITEM_KEY;
8599 key.type = BTRFS_EXTENT_ITEM_KEY;
8600 key.offset = (u64)-1;
8602 /* Search for the backref in extent tree */
8603 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8605 err |= BACKREF_MISSING;
8608 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
8610 err |= BACKREF_MISSING;
8614 leaf = path.nodes[0];
8615 slot = path.slots[0];
8616 btrfs_item_key_to_cpu(leaf, &key, slot);
8618 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8620 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8621 skinny_level = (int)key.offset;
8622 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8624 struct btrfs_tree_block_info *info;
8626 info = (struct btrfs_tree_block_info *)(ei + 1);
8627 skinny_level = btrfs_tree_block_level(leaf, info);
8628 iref = (struct btrfs_extent_inline_ref *)(info + 1);
8635 if (!(btrfs_extent_flags(leaf, ei) &
8636 BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8638 "extent[%llu %u] backref type mismatch, missing bit: %llx",
8639 key.objectid, nodesize,
8640 BTRFS_EXTENT_FLAG_TREE_BLOCK);
8641 err = BACKREF_MISMATCH;
8643 header_gen = btrfs_header_generation(eb);
8644 extent_gen = btrfs_extent_generation(leaf, ei);
8645 if (header_gen != extent_gen) {
8647 "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
8648 key.objectid, nodesize, header_gen,
8650 err = BACKREF_MISMATCH;
8652 if (level != skinny_level) {
8654 "extent[%llu %u] level mismatch, wanted: %u, have: %u",
8655 key.objectid, nodesize, level, skinny_level);
8656 err = BACKREF_MISMATCH;
8658 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
8660 "extent[%llu %u] is referred by other roots than %llu",
8661 key.objectid, nodesize, root->objectid);
8662 err = BACKREF_MISMATCH;
8667 * Iterate the extent/metadata item to find the exact backref
8669 item_size = btrfs_item_size_nr(leaf, slot);
8670 ptr = (unsigned long)iref;
8671 end = (unsigned long)ei + item_size;
8673 iref = (struct btrfs_extent_inline_ref *)ptr;
8674 type = btrfs_extent_inline_ref_type(leaf, iref);
8675 offset = btrfs_extent_inline_ref_offset(leaf, iref);
8677 if (type == BTRFS_TREE_BLOCK_REF_KEY &&
8678 (offset == root->objectid || offset == owner)) {
8680 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
8681 /* Check if the backref points to valid referencer */
8682 found_ref = !check_tree_block_ref(root, NULL, offset,
8688 ptr += btrfs_extent_inline_ref_size(type);
8692 * Inlined extent item doesn't have what we need, check
8693 * TREE_BLOCK_REF_KEY
8696 btrfs_release_path(&path);
8697 key.objectid = bytenr;
8698 key.type = BTRFS_TREE_BLOCK_REF_KEY;
8699 key.offset = root->objectid;
8701 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
8706 err |= BACKREF_MISSING;
8708 btrfs_release_path(&path);
8709 if (eb && (err & BACKREF_MISSING))
8710 error("extent[%llu %u] backref lost (owner: %llu, level: %u)",
8711 bytenr, nodesize, owner, level);
8716 * Check EXTENT_DATA item, mainly for its dbackref in extent tree
8718 * Return >0 any error found and output error message
8719 * Return 0 for no error found
8721 static int check_extent_data_item(struct btrfs_root *root,
8722 struct extent_buffer *eb, int slot)
8724 struct btrfs_file_extent_item *fi;
8725 struct btrfs_path path;
8726 struct btrfs_root *extent_root = root->fs_info->extent_root;
8727 struct btrfs_key fi_key;
8728 struct btrfs_key dbref_key;
8729 struct extent_buffer *leaf;
8730 struct btrfs_extent_item *ei;
8731 struct btrfs_extent_inline_ref *iref;
8732 struct btrfs_extent_data_ref *dref;
8734 u64 file_extent_gen;
8737 u64 extent_num_bytes;
8745 int found_dbackref = 0;
8749 btrfs_item_key_to_cpu(eb, &fi_key, slot);
8750 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
8751 file_extent_gen = btrfs_file_extent_generation(eb, fi);
8753 /* Nothing to check for hole and inline data extents */
8754 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
8755 btrfs_file_extent_disk_bytenr(eb, fi) == 0)
8758 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
8759 disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
8760 extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
8762 /* Check unaligned disk_num_bytes and num_bytes */
8763 if (!IS_ALIGNED(disk_num_bytes, root->sectorsize)) {
8765 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
8766 fi_key.objectid, fi_key.offset, disk_num_bytes,
8768 err |= BYTES_UNALIGNED;
8770 data_bytes_allocated += disk_num_bytes;
8772 if (!IS_ALIGNED(extent_num_bytes, root->sectorsize)) {
8774 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
8775 fi_key.objectid, fi_key.offset, extent_num_bytes,
8777 err |= BYTES_UNALIGNED;
8779 data_bytes_referenced += extent_num_bytes;
8781 owner = btrfs_header_owner(eb);
8783 /* Check the extent item of the file extent in extent tree */
8784 btrfs_init_path(&path);
8785 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8786 dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
8787 dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
8789 ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
8791 err |= BACKREF_MISSING;
8795 leaf = path.nodes[0];
8796 slot = path.slots[0];
8797 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
8799 extent_flags = btrfs_extent_flags(leaf, ei);
8800 extent_gen = btrfs_extent_generation(leaf, ei);
8802 if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
8804 "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
8805 disk_bytenr, disk_num_bytes,
8806 BTRFS_EXTENT_FLAG_DATA);
8807 err |= BACKREF_MISMATCH;
8810 if (file_extent_gen < extent_gen) {
8812 "extent[%llu %llu] backref generation mismatch, wanted: <=%llu, have: %llu",
8813 disk_bytenr, disk_num_bytes, file_extent_gen,
8815 err |= BACKREF_MISMATCH;
8818 /* Check data backref inside that extent item */
8819 item_size = btrfs_item_size_nr(leaf, path.slots[0]);
8820 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
8821 ptr = (unsigned long)iref;
8822 end = (unsigned long)ei + item_size;
8824 iref = (struct btrfs_extent_inline_ref *)ptr;
8825 type = btrfs_extent_inline_ref_type(leaf, iref);
8826 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8828 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
8829 ref_root = btrfs_extent_data_ref_root(leaf, dref);
8830 if (ref_root == owner || ref_root == root->objectid)
8832 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
8833 found_dbackref = !check_tree_block_ref(root, NULL,
8834 btrfs_extent_inline_ref_offset(leaf, iref),
8840 ptr += btrfs_extent_inline_ref_size(type);
8843 /* Didn't found inlined data backref, try EXTENT_DATA_REF_KEY */
8844 if (!found_dbackref) {
8845 btrfs_release_path(&path);
8847 btrfs_init_path(&path);
8848 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
8849 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
8850 dbref_key.offset = hash_extent_data_ref(root->objectid,
8851 fi_key.objectid, fi_key.offset);
8853 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
8854 &dbref_key, &path, 0, 0);
8859 if (!found_dbackref)
8860 err |= BACKREF_MISSING;
8862 btrfs_release_path(&path);
8863 if (err & BACKREF_MISSING) {
8864 error("data extent[%llu %llu] backref lost",
8865 disk_bytenr, disk_num_bytes);
8871 * Get real tree block level for the case like shared block
8872 * Return >= 0 as tree level
8873 * Return <0 for error
8875 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
8877 struct extent_buffer *eb;
8878 struct btrfs_path path;
8879 struct btrfs_key key;
8880 struct btrfs_extent_item *ei;
8883 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8888 /* Search extent tree for extent generation and level */
8889 key.objectid = bytenr;
8890 key.type = BTRFS_METADATA_ITEM_KEY;
8891 key.offset = (u64)-1;
8893 btrfs_init_path(&path);
8894 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
8897 ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
8905 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8906 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
8907 struct btrfs_extent_item);
8908 flags = btrfs_extent_flags(path.nodes[0], ei);
8909 if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
8914 /* Get transid for later read_tree_block() check */
8915 transid = btrfs_extent_generation(path.nodes[0], ei);
8917 /* Get backref level as one source */
8918 if (key.type == BTRFS_METADATA_ITEM_KEY) {
8919 backref_level = key.offset;
8921 struct btrfs_tree_block_info *info;
8923 info = (struct btrfs_tree_block_info *)(ei + 1);
8924 backref_level = btrfs_tree_block_level(path.nodes[0], info);
8926 btrfs_release_path(&path);
8928 /* Get level from tree block as an alternative source */
8929 eb = read_tree_block_fs_info(fs_info, bytenr, nodesize, transid);
8930 if (!extent_buffer_uptodate(eb)) {
8931 free_extent_buffer(eb);
8934 header_level = btrfs_header_level(eb);
8935 free_extent_buffer(eb);
8937 if (header_level != backref_level)
8939 return header_level;
8942 btrfs_release_path(&path);
8947 * Check if a tree block backref is valid (points to a valid tree block)
8948 * if level == -1, level will be resolved
8949 * Return >0 for any error found and print error message
8951 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
8952 u64 bytenr, int level)
8954 struct btrfs_root *root;
8955 struct btrfs_key key;
8956 struct btrfs_path path;
8957 struct extent_buffer *eb;
8958 struct extent_buffer *node;
8959 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
8963 /* Query level for level == -1 special case */
8965 level = query_tree_block_level(fs_info, bytenr);
8967 err |= REFERENCER_MISSING;
8971 key.objectid = root_id;
8972 key.type = BTRFS_ROOT_ITEM_KEY;
8973 key.offset = (u64)-1;
8975 root = btrfs_read_fs_root(fs_info, &key);
8977 err |= REFERENCER_MISSING;
8981 /* Read out the tree block to get item/node key */
8982 eb = read_tree_block(root, bytenr, root->nodesize, 0);
8983 if (!extent_buffer_uptodate(eb)) {
8984 err |= REFERENCER_MISSING;
8985 free_extent_buffer(eb);
8989 /* Empty tree, no need to check key */
8990 if (!btrfs_header_nritems(eb) && !level) {
8991 free_extent_buffer(eb);
8996 btrfs_node_key_to_cpu(eb, &key, 0);
8998 btrfs_item_key_to_cpu(eb, &key, 0);
9000 free_extent_buffer(eb);
9002 btrfs_init_path(&path);
9003 /* Search with the first key, to ensure we can reach it */
9004 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9006 err |= REFERENCER_MISSING;
9010 node = path.nodes[level];
9011 if (btrfs_header_bytenr(node) != bytenr) {
9013 "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
9014 bytenr, nodesize, bytenr,
9015 btrfs_header_bytenr(node));
9016 err |= REFERENCER_MISMATCH;
9018 if (btrfs_header_level(node) != level) {
9020 "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
9021 bytenr, nodesize, level,
9022 btrfs_header_level(node));
9023 err |= REFERENCER_MISMATCH;
9027 btrfs_release_path(&path);
9029 if (err & REFERENCER_MISSING) {
9031 error("extent [%llu %d] lost referencer (owner: %llu)",
9032 bytenr, nodesize, root_id);
9035 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
9036 bytenr, nodesize, root_id, level);
9043 * Check referencer for shared block backref
9044 * If level == -1, this function will resolve the level.
9046 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
9047 u64 parent, u64 bytenr, int level)
9049 struct extent_buffer *eb;
9050 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9052 int found_parent = 0;
9055 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9056 if (!extent_buffer_uptodate(eb))
9060 level = query_tree_block_level(fs_info, bytenr);
9064 if (level + 1 != btrfs_header_level(eb))
9067 nr = btrfs_header_nritems(eb);
9068 for (i = 0; i < nr; i++) {
9069 if (bytenr == btrfs_node_blockptr(eb, i)) {
9075 free_extent_buffer(eb);
9076 if (!found_parent) {
9078 "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
9079 bytenr, nodesize, parent, level);
9080 return REFERENCER_MISSING;
9086 * Check referencer for normal (inlined) data ref
9087 * If len == 0, it will be resolved by searching in extent tree
9089 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
9090 u64 root_id, u64 objectid, u64 offset,
9091 u64 bytenr, u64 len, u32 count)
9093 struct btrfs_root *root;
9094 struct btrfs_root *extent_root = fs_info->extent_root;
9095 struct btrfs_key key;
9096 struct btrfs_path path;
9097 struct extent_buffer *leaf;
9098 struct btrfs_file_extent_item *fi;
9099 u32 found_count = 0;
9104 key.objectid = bytenr;
9105 key.type = BTRFS_EXTENT_ITEM_KEY;
9106 key.offset = (u64)-1;
9108 btrfs_init_path(&path);
9109 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
9112 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
9115 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9116 if (key.objectid != bytenr ||
9117 key.type != BTRFS_EXTENT_ITEM_KEY)
9120 btrfs_release_path(&path);
9122 key.objectid = root_id;
9123 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
9124 key.offset = (u64)-1;
9125 btrfs_init_path(&path);
9127 root = btrfs_read_fs_root(fs_info, &key);
9131 key.objectid = objectid;
9132 key.type = BTRFS_EXTENT_DATA_KEY;
9134 * It can be nasty as data backref offset is
9135 * file offset - file extent offset, which is smaller or
9136 * equal to original backref offset. The only special case is
9137 * overflow. So we need to special check and do further search.
9139 key.offset = offset & (1ULL << 63) ? 0 : offset;
9141 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
9146 * Search afterwards to get correct one
9147 * NOTE: As we must do a comprehensive check on the data backref to
9148 * make sure the dref count also matches, we must iterate all file
9149 * extents for that inode.
9152 leaf = path.nodes[0];
9153 slot = path.slots[0];
9155 btrfs_item_key_to_cpu(leaf, &key, slot);
9156 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
9158 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
9160 * Except normal disk bytenr and disk num bytes, we still
9161 * need to do extra check on dbackref offset as
9162 * dbackref offset = file_offset - file_extent_offset
9164 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
9165 btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
9166 (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
9170 ret = btrfs_next_item(root, &path);
9175 btrfs_release_path(&path);
9176 if (found_count != count) {
9178 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
9179 bytenr, len, root_id, objectid, offset, count, found_count);
9180 return REFERENCER_MISSING;
9186 * Check if the referencer of a shared data backref exists
9188 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
9189 u64 parent, u64 bytenr)
9191 struct extent_buffer *eb;
9192 struct btrfs_key key;
9193 struct btrfs_file_extent_item *fi;
9194 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9196 int found_parent = 0;
9199 eb = read_tree_block_fs_info(fs_info, parent, nodesize, 0);
9200 if (!extent_buffer_uptodate(eb))
9203 nr = btrfs_header_nritems(eb);
9204 for (i = 0; i < nr; i++) {
9205 btrfs_item_key_to_cpu(eb, &key, i);
9206 if (key.type != BTRFS_EXTENT_DATA_KEY)
9209 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
9210 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
9213 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
9220 free_extent_buffer(eb);
9221 if (!found_parent) {
9222 error("shared extent %llu referencer lost (parent: %llu)",
9224 return REFERENCER_MISSING;
9230 * This function will check a given extent item, including its backref and
9231 * itself (like crossing stripe boundary and type)
9233 * Since we don't use extent_record anymore, introduce new error bit
9235 static int check_extent_item(struct btrfs_fs_info *fs_info,
9236 struct extent_buffer *eb, int slot)
9238 struct btrfs_extent_item *ei;
9239 struct btrfs_extent_inline_ref *iref;
9240 struct btrfs_extent_data_ref *dref;
9244 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9245 u32 item_size = btrfs_item_size_nr(eb, slot);
9250 struct btrfs_key key;
9254 btrfs_item_key_to_cpu(eb, &key, slot);
9255 if (key.type == BTRFS_EXTENT_ITEM_KEY)
9256 bytes_used += key.offset;
9258 bytes_used += nodesize;
9260 if (item_size < sizeof(*ei)) {
9262 * COMPAT_EXTENT_TREE_V0 case, but it's already a super
9263 * old thing when on disk format is still un-determined.
9264 * No need to care about it anymore
9266 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
9270 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
9271 flags = btrfs_extent_flags(eb, ei);
9273 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
9275 if (metadata && check_crossing_stripes(key.objectid, eb->len)) {
9276 error("bad metadata [%llu, %llu) crossing stripe boundary",
9277 key.objectid, key.objectid + nodesize);
9278 err |= CROSSING_STRIPE_BOUNDARY;
9281 ptr = (unsigned long)(ei + 1);
9283 if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
9284 /* Old EXTENT_ITEM metadata */
9285 struct btrfs_tree_block_info *info;
9287 info = (struct btrfs_tree_block_info *)ptr;
9288 level = btrfs_tree_block_level(eb, info);
9289 ptr += sizeof(struct btrfs_tree_block_info);
9291 /* New METADATA_ITEM */
9294 end = (unsigned long)ei + item_size;
9297 err |= ITEM_SIZE_MISMATCH;
9301 /* Now check every backref in this extent item */
9303 iref = (struct btrfs_extent_inline_ref *)ptr;
9304 type = btrfs_extent_inline_ref_type(eb, iref);
9305 offset = btrfs_extent_inline_ref_offset(eb, iref);
9307 case BTRFS_TREE_BLOCK_REF_KEY:
9308 ret = check_tree_block_backref(fs_info, offset, key.objectid,
9312 case BTRFS_SHARED_BLOCK_REF_KEY:
9313 ret = check_shared_block_backref(fs_info, offset, key.objectid,
9317 case BTRFS_EXTENT_DATA_REF_KEY:
9318 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9319 ret = check_extent_data_backref(fs_info,
9320 btrfs_extent_data_ref_root(eb, dref),
9321 btrfs_extent_data_ref_objectid(eb, dref),
9322 btrfs_extent_data_ref_offset(eb, dref),
9323 key.objectid, key.offset,
9324 btrfs_extent_data_ref_count(eb, dref));
9327 case BTRFS_SHARED_DATA_REF_KEY:
9328 ret = check_shared_data_backref(fs_info, offset, key.objectid);
9332 error("extent[%llu %d %llu] has unknown ref type: %d",
9333 key.objectid, key.type, key.offset, type);
9334 err |= UNKNOWN_TYPE;
9338 ptr += btrfs_extent_inline_ref_size(type);
9347 * Check if a dev extent item is referred correctly by its chunk
9349 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
9350 struct extent_buffer *eb, int slot)
9352 struct btrfs_root *chunk_root = fs_info->chunk_root;
9353 struct btrfs_dev_extent *ptr;
9354 struct btrfs_path path;
9355 struct btrfs_key chunk_key;
9356 struct btrfs_key devext_key;
9357 struct btrfs_chunk *chunk;
9358 struct extent_buffer *l;
9362 int found_chunk = 0;
9365 btrfs_item_key_to_cpu(eb, &devext_key, slot);
9366 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
9367 length = btrfs_dev_extent_length(eb, ptr);
9369 chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
9370 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9371 chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
9373 btrfs_init_path(&path);
9374 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9379 chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
9380 if (btrfs_chunk_length(l, chunk) != length)
9383 num_stripes = btrfs_chunk_num_stripes(l, chunk);
9384 for (i = 0; i < num_stripes; i++) {
9385 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
9386 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
9388 if (devid == devext_key.objectid &&
9389 offset == devext_key.offset) {
9395 btrfs_release_path(&path);
9398 "device extent[%llu, %llu, %llu] did not find the related chunk",
9399 devext_key.objectid, devext_key.offset, length);
9400 return REFERENCER_MISSING;
9406 * Check if the used space is correct with the dev item
9408 static int check_dev_item(struct btrfs_fs_info *fs_info,
9409 struct extent_buffer *eb, int slot)
9411 struct btrfs_root *dev_root = fs_info->dev_root;
9412 struct btrfs_dev_item *dev_item;
9413 struct btrfs_path path;
9414 struct btrfs_key key;
9415 struct btrfs_dev_extent *ptr;
9421 dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
9422 dev_id = btrfs_device_id(eb, dev_item);
9423 used = btrfs_device_bytes_used(eb, dev_item);
9425 key.objectid = dev_id;
9426 key.type = BTRFS_DEV_EXTENT_KEY;
9429 btrfs_init_path(&path);
9430 ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
9432 btrfs_item_key_to_cpu(eb, &key, slot);
9433 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
9434 key.objectid, key.type, key.offset);
9435 btrfs_release_path(&path);
9436 return REFERENCER_MISSING;
9439 /* Iterate dev_extents to calculate the used space of a device */
9441 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
9443 if (key.objectid > dev_id)
9445 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
9448 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
9449 struct btrfs_dev_extent);
9450 total += btrfs_dev_extent_length(path.nodes[0], ptr);
9452 ret = btrfs_next_item(dev_root, &path);
9456 btrfs_release_path(&path);
9458 if (used != total) {
9459 btrfs_item_key_to_cpu(eb, &key, slot);
9461 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
9462 total, used, BTRFS_ROOT_TREE_OBJECTID,
9463 BTRFS_DEV_EXTENT_KEY, dev_id);
9464 return ACCOUNTING_MISMATCH;
9470 * Check a block group item with its referener (chunk) and its used space
9471 * with extent/metadata item
9473 static int check_block_group_item(struct btrfs_fs_info *fs_info,
9474 struct extent_buffer *eb, int slot)
9476 struct btrfs_root *extent_root = fs_info->extent_root;
9477 struct btrfs_root *chunk_root = fs_info->chunk_root;
9478 struct btrfs_block_group_item *bi;
9479 struct btrfs_block_group_item bg_item;
9480 struct btrfs_path path;
9481 struct btrfs_key bg_key;
9482 struct btrfs_key chunk_key;
9483 struct btrfs_key extent_key;
9484 struct btrfs_chunk *chunk;
9485 struct extent_buffer *leaf;
9486 struct btrfs_extent_item *ei;
9487 u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9495 btrfs_item_key_to_cpu(eb, &bg_key, slot);
9496 bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
9497 read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
9498 used = btrfs_block_group_used(&bg_item);
9499 bg_flags = btrfs_block_group_flags(&bg_item);
9501 chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
9502 chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
9503 chunk_key.offset = bg_key.objectid;
9505 btrfs_init_path(&path);
9506 /* Search for the referencer chunk */
9507 ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
9510 "block group[%llu %llu] did not find the related chunk item",
9511 bg_key.objectid, bg_key.offset);
9512 err |= REFERENCER_MISSING;
9514 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
9515 struct btrfs_chunk);
9516 if (btrfs_chunk_length(path.nodes[0], chunk) !=
9519 "block group[%llu %llu] related chunk item length does not match",
9520 bg_key.objectid, bg_key.offset);
9521 err |= REFERENCER_MISMATCH;
9524 btrfs_release_path(&path);
9526 /* Search from the block group bytenr */
9527 extent_key.objectid = bg_key.objectid;
9528 extent_key.type = 0;
9529 extent_key.offset = 0;
9531 btrfs_init_path(&path);
9532 ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
9536 /* Iterate extent tree to account used space */
9538 leaf = path.nodes[0];
9539 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
9540 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
9543 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
9544 extent_key.type != BTRFS_EXTENT_ITEM_KEY)
9546 if (extent_key.objectid < bg_key.objectid)
9549 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
9552 total += extent_key.offset;
9554 ei = btrfs_item_ptr(leaf, path.slots[0],
9555 struct btrfs_extent_item);
9556 flags = btrfs_extent_flags(leaf, ei);
9557 if (flags & BTRFS_EXTENT_FLAG_DATA) {
9558 if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
9560 "bad extent[%llu, %llu) type mismatch with chunk",
9561 extent_key.objectid,
9562 extent_key.objectid + extent_key.offset);
9563 err |= CHUNK_TYPE_MISMATCH;
9565 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
9566 if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
9567 BTRFS_BLOCK_GROUP_METADATA))) {
9569 "bad extent[%llu, %llu) type mismatch with chunk",
9570 extent_key.objectid,
9571 extent_key.objectid + nodesize);
9572 err |= CHUNK_TYPE_MISMATCH;
9576 ret = btrfs_next_item(extent_root, &path);
9582 btrfs_release_path(&path);
9584 if (total != used) {
9586 "block group[%llu %llu] used %llu but extent items used %llu",
9587 bg_key.objectid, bg_key.offset, used, total);
9588 err |= ACCOUNTING_MISMATCH;
9593 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
9594 struct btrfs_root *root, int overwrite)
9596 struct extent_buffer *c;
9597 struct extent_buffer *old = root->node;
9600 struct btrfs_disk_key disk_key = {0,0,0};
9606 extent_buffer_get(c);
9609 c = btrfs_alloc_free_block(trans, root,
9611 root->root_key.objectid,
9612 &disk_key, level, 0, 0);
9615 extent_buffer_get(c);
9619 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
9620 btrfs_set_header_level(c, level);
9621 btrfs_set_header_bytenr(c, c->start);
9622 btrfs_set_header_generation(c, trans->transid);
9623 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
9624 btrfs_set_header_owner(c, root->root_key.objectid);
9626 write_extent_buffer(c, root->fs_info->fsid,
9627 btrfs_header_fsid(), BTRFS_FSID_SIZE);
9629 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
9630 btrfs_header_chunk_tree_uuid(c),
9633 btrfs_mark_buffer_dirty(c);
9635 * this case can happen in the following case:
9637 * 1.overwrite previous root.
9639 * 2.reinit reloc data root, this is because we skip pin
9640 * down reloc data tree before which means we can allocate
9641 * same block bytenr here.
9643 if (old->start == c->start) {
9644 btrfs_set_root_generation(&root->root_item,
9646 root->root_item.level = btrfs_header_level(root->node);
9647 ret = btrfs_update_root(trans, root->fs_info->tree_root,
9648 &root->root_key, &root->root_item);
9650 free_extent_buffer(c);
9654 free_extent_buffer(old);
9656 add_root_to_dirty_list(root);
9660 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
9661 struct extent_buffer *eb, int tree_root)
9663 struct extent_buffer *tmp;
9664 struct btrfs_root_item *ri;
9665 struct btrfs_key key;
9668 int level = btrfs_header_level(eb);
9674 * If we have pinned this block before, don't pin it again.
9675 * This can not only avoid forever loop with broken filesystem
9676 * but also give us some speedups.
9678 if (test_range_bit(&fs_info->pinned_extents, eb->start,
9679 eb->start + eb->len - 1, EXTENT_DIRTY, 0))
9682 btrfs_pin_extent(fs_info, eb->start, eb->len);
9684 nodesize = btrfs_super_nodesize(fs_info->super_copy);
9685 nritems = btrfs_header_nritems(eb);
9686 for (i = 0; i < nritems; i++) {
9688 btrfs_item_key_to_cpu(eb, &key, i);
9689 if (key.type != BTRFS_ROOT_ITEM_KEY)
9691 /* Skip the extent root and reloc roots */
9692 if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
9693 key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
9694 key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
9696 ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
9697 bytenr = btrfs_disk_root_bytenr(eb, ri);
9700 * If at any point we start needing the real root we
9701 * will have to build a stump root for the root we are
9702 * in, but for now this doesn't actually use the root so
9703 * just pass in extent_root.
9705 tmp = read_tree_block(fs_info->extent_root, bytenr,
9707 if (!extent_buffer_uptodate(tmp)) {
9708 fprintf(stderr, "Error reading root block\n");
9711 ret = pin_down_tree_blocks(fs_info, tmp, 0);
9712 free_extent_buffer(tmp);
9716 bytenr = btrfs_node_blockptr(eb, i);
9718 /* If we aren't the tree root don't read the block */
9719 if (level == 1 && !tree_root) {
9720 btrfs_pin_extent(fs_info, bytenr, nodesize);
9724 tmp = read_tree_block(fs_info->extent_root, bytenr,
9726 if (!extent_buffer_uptodate(tmp)) {
9727 fprintf(stderr, "Error reading tree block\n");
9730 ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
9731 free_extent_buffer(tmp);
9740 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
9744 ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
9748 return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
9751 static int reset_block_groups(struct btrfs_fs_info *fs_info)
9753 struct btrfs_block_group_cache *cache;
9754 struct btrfs_path *path;
9755 struct extent_buffer *leaf;
9756 struct btrfs_chunk *chunk;
9757 struct btrfs_key key;
9761 path = btrfs_alloc_path();
9766 key.type = BTRFS_CHUNK_ITEM_KEY;
9769 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
9771 btrfs_free_path(path);
9776 * We do this in case the block groups were screwed up and had alloc
9777 * bits that aren't actually set on the chunks. This happens with
9778 * restored images every time and could happen in real life I guess.
9780 fs_info->avail_data_alloc_bits = 0;
9781 fs_info->avail_metadata_alloc_bits = 0;
9782 fs_info->avail_system_alloc_bits = 0;
9784 /* First we need to create the in-memory block groups */
9786 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9787 ret = btrfs_next_leaf(fs_info->chunk_root, path);
9789 btrfs_free_path(path);
9797 leaf = path->nodes[0];
9798 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9799 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
9804 chunk = btrfs_item_ptr(leaf, path->slots[0],
9805 struct btrfs_chunk);
9806 btrfs_add_block_group(fs_info, 0,
9807 btrfs_chunk_type(leaf, chunk),
9808 key.objectid, key.offset,
9809 btrfs_chunk_length(leaf, chunk));
9810 set_extent_dirty(&fs_info->free_space_cache, key.offset,
9811 key.offset + btrfs_chunk_length(leaf, chunk),
9817 cache = btrfs_lookup_first_block_group(fs_info, start);
9821 start = cache->key.objectid + cache->key.offset;
9824 btrfs_free_path(path);
9828 static int reset_balance(struct btrfs_trans_handle *trans,
9829 struct btrfs_fs_info *fs_info)
9831 struct btrfs_root *root = fs_info->tree_root;
9832 struct btrfs_path *path;
9833 struct extent_buffer *leaf;
9834 struct btrfs_key key;
9835 int del_slot, del_nr = 0;
9839 path = btrfs_alloc_path();
9843 key.objectid = BTRFS_BALANCE_OBJECTID;
9844 key.type = BTRFS_BALANCE_ITEM_KEY;
9847 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9852 goto reinit_data_reloc;
9857 ret = btrfs_del_item(trans, root, path);
9860 btrfs_release_path(path);
9862 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9863 key.type = BTRFS_ROOT_ITEM_KEY;
9866 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
9870 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9875 ret = btrfs_del_items(trans, root, path,
9882 btrfs_release_path(path);
9885 ret = btrfs_search_slot(trans, root, &key, path,
9892 leaf = path->nodes[0];
9893 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9894 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
9896 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
9901 del_slot = path->slots[0];
9910 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
9914 btrfs_release_path(path);
9917 key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
9918 key.type = BTRFS_ROOT_ITEM_KEY;
9919 key.offset = (u64)-1;
9920 root = btrfs_read_fs_root(fs_info, &key);
9922 fprintf(stderr, "Error reading data reloc tree\n");
9923 ret = PTR_ERR(root);
9926 record_root_in_trans(trans, root);
9927 ret = btrfs_fsck_reinit_root(trans, root, 0);
9930 ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
9932 btrfs_free_path(path);
9936 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
9937 struct btrfs_fs_info *fs_info)
9943 * The only reason we don't do this is because right now we're just
9944 * walking the trees we find and pinning down their bytes, we don't look
9945 * at any of the leaves. In order to do mixed groups we'd have to check
9946 * the leaves of any fs roots and pin down the bytes for any file
9947 * extents we find. Not hard but why do it if we don't have to?
9949 if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) {
9950 fprintf(stderr, "We don't support re-initing the extent tree "
9951 "for mixed block groups yet, please notify a btrfs "
9952 "developer you want to do this so they can add this "
9953 "functionality.\n");
9958 * first we need to walk all of the trees except the extent tree and pin
9959 * down the bytes that are in use so we don't overwrite any existing
9962 ret = pin_metadata_blocks(fs_info);
9964 fprintf(stderr, "error pinning down used bytes\n");
9969 * Need to drop all the block groups since we're going to recreate all
9972 btrfs_free_block_groups(fs_info);
9973 ret = reset_block_groups(fs_info);
9975 fprintf(stderr, "error resetting the block groups\n");
9979 /* Ok we can allocate now, reinit the extent root */
9980 ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
9982 fprintf(stderr, "extent root initialization failed\n");
9984 * When the transaction code is updated we should end the
9985 * transaction, but for now progs only knows about commit so
9986 * just return an error.
9992 * Now we have all the in-memory block groups setup so we can make
9993 * allocations properly, and the metadata we care about is safe since we
9994 * pinned all of it above.
9997 struct btrfs_block_group_cache *cache;
9999 cache = btrfs_lookup_first_block_group(fs_info, start);
10002 start = cache->key.objectid + cache->key.offset;
10003 ret = btrfs_insert_item(trans, fs_info->extent_root,
10004 &cache->key, &cache->item,
10005 sizeof(cache->item));
10007 fprintf(stderr, "Error adding block group\n");
10010 btrfs_extent_post_op(trans, fs_info->extent_root);
10013 ret = reset_balance(trans, fs_info);
10015 fprintf(stderr, "error resetting the pending balance\n");
10020 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
10022 struct btrfs_path *path;
10023 struct btrfs_trans_handle *trans;
10024 struct btrfs_key key;
10027 printf("Recowing metadata block %llu\n", eb->start);
10028 key.objectid = btrfs_header_owner(eb);
10029 key.type = BTRFS_ROOT_ITEM_KEY;
10030 key.offset = (u64)-1;
10032 root = btrfs_read_fs_root(root->fs_info, &key);
10033 if (IS_ERR(root)) {
10034 fprintf(stderr, "Couldn't find owner root %llu\n",
10036 return PTR_ERR(root);
10039 path = btrfs_alloc_path();
10043 trans = btrfs_start_transaction(root, 1);
10044 if (IS_ERR(trans)) {
10045 btrfs_free_path(path);
10046 return PTR_ERR(trans);
10049 path->lowest_level = btrfs_header_level(eb);
10050 if (path->lowest_level)
10051 btrfs_node_key_to_cpu(eb, &key, 0);
10053 btrfs_item_key_to_cpu(eb, &key, 0);
10055 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
10056 btrfs_commit_transaction(trans, root);
10057 btrfs_free_path(path);
10061 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
10063 struct btrfs_path *path;
10064 struct btrfs_trans_handle *trans;
10065 struct btrfs_key key;
10068 printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
10069 bad->key.type, bad->key.offset);
10070 key.objectid = bad->root_id;
10071 key.type = BTRFS_ROOT_ITEM_KEY;
10072 key.offset = (u64)-1;
10074 root = btrfs_read_fs_root(root->fs_info, &key);
10075 if (IS_ERR(root)) {
10076 fprintf(stderr, "Couldn't find owner root %llu\n",
10078 return PTR_ERR(root);
10081 path = btrfs_alloc_path();
10085 trans = btrfs_start_transaction(root, 1);
10086 if (IS_ERR(trans)) {
10087 btrfs_free_path(path);
10088 return PTR_ERR(trans);
10091 ret = btrfs_search_slot(trans, root, &bad->key, path, -1, 1);
10097 ret = btrfs_del_item(trans, root, path);
10099 btrfs_commit_transaction(trans, root);
10100 btrfs_free_path(path);
10104 static int zero_log_tree(struct btrfs_root *root)
10106 struct btrfs_trans_handle *trans;
10109 trans = btrfs_start_transaction(root, 1);
10110 if (IS_ERR(trans)) {
10111 ret = PTR_ERR(trans);
10114 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
10115 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
10116 ret = btrfs_commit_transaction(trans, root);
10120 static int populate_csum(struct btrfs_trans_handle *trans,
10121 struct btrfs_root *csum_root, char *buf, u64 start,
10128 while (offset < len) {
10129 sectorsize = csum_root->sectorsize;
10130 ret = read_extent_data(csum_root, buf, start + offset,
10134 ret = btrfs_csum_file_block(trans, csum_root, start + len,
10135 start + offset, buf, sectorsize);
10138 offset += sectorsize;
10143 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
10144 struct btrfs_root *csum_root,
10145 struct btrfs_root *cur_root)
10147 struct btrfs_path *path;
10148 struct btrfs_key key;
10149 struct extent_buffer *node;
10150 struct btrfs_file_extent_item *fi;
10157 path = btrfs_alloc_path();
10160 buf = malloc(cur_root->fs_info->csum_root->sectorsize);
10170 ret = btrfs_search_slot(NULL, cur_root, &key, path, 0, 0);
10173 /* Iterate all regular file extents and fill its csum */
10175 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
10177 if (key.type != BTRFS_EXTENT_DATA_KEY)
10179 node = path->nodes[0];
10180 slot = path->slots[0];
10181 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
10182 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
10184 start = btrfs_file_extent_disk_bytenr(node, fi);
10185 len = btrfs_file_extent_disk_num_bytes(node, fi);
10187 ret = populate_csum(trans, csum_root, buf, start, len);
10188 if (ret == -EEXIST)
10194 * TODO: if next leaf is corrupted, jump to nearest next valid
10197 ret = btrfs_next_item(cur_root, path);
10207 btrfs_free_path(path);
10212 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
10213 struct btrfs_root *csum_root)
10215 struct btrfs_fs_info *fs_info = csum_root->fs_info;
10216 struct btrfs_path *path;
10217 struct btrfs_root *tree_root = fs_info->tree_root;
10218 struct btrfs_root *cur_root;
10219 struct extent_buffer *node;
10220 struct btrfs_key key;
10224 path = btrfs_alloc_path();
10228 key.objectid = BTRFS_FS_TREE_OBJECTID;
10230 key.type = BTRFS_ROOT_ITEM_KEY;
10232 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
10241 node = path->nodes[0];
10242 slot = path->slots[0];
10243 btrfs_item_key_to_cpu(node, &key, slot);
10244 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
10246 if (key.type != BTRFS_ROOT_ITEM_KEY)
10248 if (!is_fstree(key.objectid))
10250 key.offset = (u64)-1;
10252 cur_root = btrfs_read_fs_root(fs_info, &key);
10253 if (IS_ERR(cur_root) || !cur_root) {
10254 fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
10258 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
10263 ret = btrfs_next_item(tree_root, path);
10273 btrfs_free_path(path);
10277 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
10278 struct btrfs_root *csum_root)
10280 struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
10281 struct btrfs_path *path;
10282 struct btrfs_extent_item *ei;
10283 struct extent_buffer *leaf;
10285 struct btrfs_key key;
10288 path = btrfs_alloc_path();
10293 key.type = BTRFS_EXTENT_ITEM_KEY;
10296 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
10298 btrfs_free_path(path);
10302 buf = malloc(csum_root->sectorsize);
10304 btrfs_free_path(path);
10309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
10310 ret = btrfs_next_leaf(extent_root, path);
10318 leaf = path->nodes[0];
10320 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
10321 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
10326 ei = btrfs_item_ptr(leaf, path->slots[0],
10327 struct btrfs_extent_item);
10328 if (!(btrfs_extent_flags(leaf, ei) &
10329 BTRFS_EXTENT_FLAG_DATA)) {
10334 ret = populate_csum(trans, csum_root, buf, key.objectid,
10341 btrfs_free_path(path);
10347 * Recalculate the csum and put it into the csum tree.
10349 * Extent tree init will wipe out all the extent info, so in that case, we
10350 * can't depend on extent tree, but use fs tree. If search_fs_tree is set, we
10351 * will use fs/subvol trees to init the csum tree.
10353 static int fill_csum_tree(struct btrfs_trans_handle *trans,
10354 struct btrfs_root *csum_root,
10355 int search_fs_tree)
10357 if (search_fs_tree)
10358 return fill_csum_tree_from_fs(trans, csum_root);
10360 return fill_csum_tree_from_extent(trans, csum_root);
10363 static void free_roots_info_cache(void)
10365 if (!roots_info_cache)
10368 while (!cache_tree_empty(roots_info_cache)) {
10369 struct cache_extent *entry;
10370 struct root_item_info *rii;
10372 entry = first_cache_extent(roots_info_cache);
10375 remove_cache_extent(roots_info_cache, entry);
10376 rii = container_of(entry, struct root_item_info, cache_extent);
10380 free(roots_info_cache);
10381 roots_info_cache = NULL;
10384 static int build_roots_info_cache(struct btrfs_fs_info *info)
10387 struct btrfs_key key;
10388 struct extent_buffer *leaf;
10389 struct btrfs_path *path;
10391 if (!roots_info_cache) {
10392 roots_info_cache = malloc(sizeof(*roots_info_cache));
10393 if (!roots_info_cache)
10395 cache_tree_init(roots_info_cache);
10398 path = btrfs_alloc_path();
10403 key.type = BTRFS_EXTENT_ITEM_KEY;
10406 ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
10409 leaf = path->nodes[0];
10412 struct btrfs_key found_key;
10413 struct btrfs_extent_item *ei;
10414 struct btrfs_extent_inline_ref *iref;
10415 int slot = path->slots[0];
10420 struct cache_extent *entry;
10421 struct root_item_info *rii;
10423 if (slot >= btrfs_header_nritems(leaf)) {
10424 ret = btrfs_next_leaf(info->extent_root, path);
10431 leaf = path->nodes[0];
10432 slot = path->slots[0];
10435 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10437 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
10438 found_key.type != BTRFS_METADATA_ITEM_KEY)
10441 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
10442 flags = btrfs_extent_flags(leaf, ei);
10444 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
10445 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
10448 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
10449 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
10450 level = found_key.offset;
10452 struct btrfs_tree_block_info *binfo;
10454 binfo = (struct btrfs_tree_block_info *)(ei + 1);
10455 iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
10456 level = btrfs_tree_block_level(leaf, binfo);
10460 * For a root extent, it must be of the following type and the
10461 * first (and only one) iref in the item.
10463 type = btrfs_extent_inline_ref_type(leaf, iref);
10464 if (type != BTRFS_TREE_BLOCK_REF_KEY)
10467 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
10468 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10470 rii = malloc(sizeof(struct root_item_info));
10475 rii->cache_extent.start = root_id;
10476 rii->cache_extent.size = 1;
10477 rii->level = (u8)-1;
10478 entry = &rii->cache_extent;
10479 ret = insert_cache_extent(roots_info_cache, entry);
10482 rii = container_of(entry, struct root_item_info,
10486 ASSERT(rii->cache_extent.start == root_id);
10487 ASSERT(rii->cache_extent.size == 1);
10489 if (level > rii->level || rii->level == (u8)-1) {
10490 rii->level = level;
10491 rii->bytenr = found_key.objectid;
10492 rii->gen = btrfs_extent_generation(leaf, ei);
10493 rii->node_count = 1;
10494 } else if (level == rii->level) {
10502 btrfs_free_path(path);
10507 static int maybe_repair_root_item(struct btrfs_fs_info *info,
10508 struct btrfs_path *path,
10509 const struct btrfs_key *root_key,
10510 const int read_only_mode)
10512 const u64 root_id = root_key->objectid;
10513 struct cache_extent *entry;
10514 struct root_item_info *rii;
10515 struct btrfs_root_item ri;
10516 unsigned long offset;
10518 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
10521 "Error: could not find extent items for root %llu\n",
10522 root_key->objectid);
10526 rii = container_of(entry, struct root_item_info, cache_extent);
10527 ASSERT(rii->cache_extent.start == root_id);
10528 ASSERT(rii->cache_extent.size == 1);
10530 if (rii->node_count != 1) {
10532 "Error: could not find btree root extent for root %llu\n",
10537 offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
10538 read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
10540 if (btrfs_root_bytenr(&ri) != rii->bytenr ||
10541 btrfs_root_level(&ri) != rii->level ||
10542 btrfs_root_generation(&ri) != rii->gen) {
10545 * If we're in repair mode but our caller told us to not update
10546 * the root item, i.e. just check if it needs to be updated, don't
10547 * print this message, since the caller will call us again shortly
10548 * for the same root item without read only mode (the caller will
10549 * open a transaction first).
10551 if (!(read_only_mode && repair))
10553 "%sroot item for root %llu,"
10554 " current bytenr %llu, current gen %llu, current level %u,"
10555 " new bytenr %llu, new gen %llu, new level %u\n",
10556 (read_only_mode ? "" : "fixing "),
10558 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
10559 btrfs_root_level(&ri),
10560 rii->bytenr, rii->gen, rii->level);
10562 if (btrfs_root_generation(&ri) > rii->gen) {
10564 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
10565 root_id, btrfs_root_generation(&ri), rii->gen);
10569 if (!read_only_mode) {
10570 btrfs_set_root_bytenr(&ri, rii->bytenr);
10571 btrfs_set_root_level(&ri, rii->level);
10572 btrfs_set_root_generation(&ri, rii->gen);
10573 write_extent_buffer(path->nodes[0], &ri,
10574 offset, sizeof(ri));
10584 * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
10585 * caused read-only snapshots to be corrupted if they were created at a moment
10586 * when the source subvolume/snapshot had orphan items. The issue was that the
10587 * on-disk root items became incorrect, referring to the pre orphan cleanup root
10588 * node instead of the post orphan cleanup root node.
10589 * So this function, and its callees, just detects and fixes those cases. Even
10590 * though the regression was for read-only snapshots, this function applies to
10591 * any snapshot/subvolume root.
10592 * This must be run before any other repair code - not doing it so, makes other
10593 * repair code delete or modify backrefs in the extent tree for example, which
10594 * will result in an inconsistent fs after repairing the root items.
10596 static int repair_root_items(struct btrfs_fs_info *info)
10598 struct btrfs_path *path = NULL;
10599 struct btrfs_key key;
10600 struct extent_buffer *leaf;
10601 struct btrfs_trans_handle *trans = NULL;
10604 int need_trans = 0;
10606 ret = build_roots_info_cache(info);
10610 path = btrfs_alloc_path();
10616 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
10617 key.type = BTRFS_ROOT_ITEM_KEY;
10622 * Avoid opening and committing transactions if a leaf doesn't have
10623 * any root items that need to be fixed, so that we avoid rotating
10624 * backup roots unnecessarily.
10627 trans = btrfs_start_transaction(info->tree_root, 1);
10628 if (IS_ERR(trans)) {
10629 ret = PTR_ERR(trans);
10634 ret = btrfs_search_slot(trans, info->tree_root, &key, path,
10638 leaf = path->nodes[0];
10641 struct btrfs_key found_key;
10643 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
10644 int no_more_keys = find_next_key(path, &key);
10646 btrfs_release_path(path);
10648 ret = btrfs_commit_transaction(trans,
10660 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
10662 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
10664 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10667 ret = maybe_repair_root_item(info, path, &found_key,
10672 if (!trans && repair) {
10675 btrfs_release_path(path);
10685 free_roots_info_cache();
10686 btrfs_free_path(path);
10688 btrfs_commit_transaction(trans, info->tree_root);
10695 const char * const cmd_check_usage[] = {
10696 "btrfs check [options] <device>",
10697 "Check structural integrity of a filesystem (unmounted).",
10698 "Check structural integrity of an unmounted filesystem. Verify internal",
10699 "trees' consistency and item connectivity. In the repair mode try to",
10700 "fix the problems found.",
10701 "WARNING: the repair mode is considered dangerous",
10703 "-s|--super <superblock> use this superblock copy",
10704 "-b|--backup use the first valid backup root copy",
10705 "--repair try to repair the filesystem",
10706 "--readonly run in read-only mode (default)",
10707 "--init-csum-tree create a new CRC tree",
10708 "--init-extent-tree create a new extent tree",
10709 "--check-data-csum verify checksums of data blocks",
10710 "-Q|--qgroup-report print a report on qgroup consistency",
10711 "-E|--subvol-extents <subvolid>",
10712 " print subvolume extents and sharing state",
10713 "-r|--tree-root <bytenr> use the given bytenr for the tree root",
10714 "--chunk-root <bytenr> use the given bytenr for the chunk tree root",
10715 "-p|--progress indicate progress",
10719 int cmd_check(int argc, char **argv)
10721 struct cache_tree root_cache;
10722 struct btrfs_root *root;
10723 struct btrfs_fs_info *info;
10726 u64 tree_root_bytenr = 0;
10727 u64 chunk_root_bytenr = 0;
10728 char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
10731 int init_csum_tree = 0;
10733 int qgroup_report = 0;
10734 int qgroups_repaired = 0;
10735 enum btrfs_open_ctree_flags ctree_flags = OPEN_CTREE_EXCLUSIVE;
10739 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
10740 GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
10741 GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE };
10742 static const struct option long_options[] = {
10743 { "super", required_argument, NULL, 's' },
10744 { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
10745 { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
10746 { "init-csum-tree", no_argument, NULL,
10747 GETOPT_VAL_INIT_CSUM },
10748 { "init-extent-tree", no_argument, NULL,
10749 GETOPT_VAL_INIT_EXTENT },
10750 { "check-data-csum", no_argument, NULL,
10751 GETOPT_VAL_CHECK_CSUM },
10752 { "backup", no_argument, NULL, 'b' },
10753 { "subvol-extents", required_argument, NULL, 'E' },
10754 { "qgroup-report", no_argument, NULL, 'Q' },
10755 { "tree-root", required_argument, NULL, 'r' },
10756 { "chunk-root", required_argument, NULL,
10757 GETOPT_VAL_CHUNK_TREE },
10758 { "progress", no_argument, NULL, 'p' },
10759 { NULL, 0, NULL, 0}
10762 c = getopt_long(argc, argv, "as:br:p", long_options, NULL);
10766 case 'a': /* ignored */ break;
10768 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
10771 num = arg_strtou64(optarg);
10772 if (num >= BTRFS_SUPER_MIRROR_MAX) {
10774 "ERROR: super mirror should be less than: %d\n",
10775 BTRFS_SUPER_MIRROR_MAX);
10778 bytenr = btrfs_sb_offset(((int)num));
10779 printf("using SB copy %llu, bytenr %llu\n", num,
10780 (unsigned long long)bytenr);
10786 subvolid = arg_strtou64(optarg);
10789 tree_root_bytenr = arg_strtou64(optarg);
10791 case GETOPT_VAL_CHUNK_TREE:
10792 chunk_root_bytenr = arg_strtou64(optarg);
10795 ctx.progress_enabled = true;
10799 usage(cmd_check_usage);
10800 case GETOPT_VAL_REPAIR:
10801 printf("enabling repair mode\n");
10803 ctree_flags |= OPEN_CTREE_WRITES;
10805 case GETOPT_VAL_READONLY:
10808 case GETOPT_VAL_INIT_CSUM:
10809 printf("Creating a new CRC tree\n");
10810 init_csum_tree = 1;
10812 ctree_flags |= OPEN_CTREE_WRITES;
10814 case GETOPT_VAL_INIT_EXTENT:
10815 init_extent_tree = 1;
10816 ctree_flags |= (OPEN_CTREE_WRITES |
10817 OPEN_CTREE_NO_BLOCK_GROUPS);
10820 case GETOPT_VAL_CHECK_CSUM:
10821 check_data_csum = 1;
10826 if (check_argc_exact(argc - optind, 1))
10827 usage(cmd_check_usage);
10829 if (ctx.progress_enabled) {
10830 ctx.tp = TASK_NOTHING;
10831 ctx.info = task_init(print_status_check, print_status_return, &ctx);
10834 /* This check is the only reason for --readonly to exist */
10835 if (readonly && repair) {
10836 fprintf(stderr, "Repair options are not compatible with --readonly\n");
10841 cache_tree_init(&root_cache);
10843 if((ret = check_mounted(argv[optind])) < 0) {
10844 fprintf(stderr, "Could not check mount status: %s\n", strerror(-ret));
10847 fprintf(stderr, "%s is currently mounted. Aborting.\n", argv[optind]);
10852 /* only allow partial opening under repair mode */
10854 ctree_flags |= OPEN_CTREE_PARTIAL;
10856 info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
10857 chunk_root_bytenr, ctree_flags);
10859 fprintf(stderr, "Couldn't open file system\n");
10864 global_info = info;
10865 root = info->fs_root;
10868 * repair mode will force us to commit transaction which
10869 * will make us fail to load log tree when mounting.
10871 if (repair && btrfs_super_log_root(info->super_copy)) {
10872 ret = ask_user("repair mode will force to clear out log tree, Are you sure?");
10877 ret = zero_log_tree(root);
10879 fprintf(stderr, "fail to zero log tree\n");
10884 uuid_unparse(info->super_copy->fsid, uuidbuf);
10885 if (qgroup_report) {
10886 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
10888 ret = qgroup_verify_all(info);
10894 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
10895 subvolid, argv[optind], uuidbuf);
10896 ret = print_extent_state(info, subvolid);
10899 printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
10901 if (!extent_buffer_uptodate(info->tree_root->node) ||
10902 !extent_buffer_uptodate(info->dev_root->node) ||
10903 !extent_buffer_uptodate(info->chunk_root->node)) {
10904 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10909 if (init_extent_tree || init_csum_tree) {
10910 struct btrfs_trans_handle *trans;
10912 trans = btrfs_start_transaction(info->extent_root, 0);
10913 if (IS_ERR(trans)) {
10914 fprintf(stderr, "Error starting transaction\n");
10915 ret = PTR_ERR(trans);
10919 if (init_extent_tree) {
10920 printf("Creating a new extent tree\n");
10921 ret = reinit_extent_tree(trans, info);
10926 if (init_csum_tree) {
10927 fprintf(stderr, "Reinit crc root\n");
10928 ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
10930 fprintf(stderr, "crc root initialization failed\n");
10935 ret = fill_csum_tree(trans, info->csum_root,
10938 fprintf(stderr, "crc refilling failed\n");
10943 * Ok now we commit and run the normal fsck, which will add
10944 * extent entries for all of the items it finds.
10946 ret = btrfs_commit_transaction(trans, info->extent_root);
10950 if (!extent_buffer_uptodate(info->extent_root->node)) {
10951 fprintf(stderr, "Critical roots corrupted, unable to fsck the FS\n");
10955 if (!extent_buffer_uptodate(info->csum_root->node)) {
10956 fprintf(stderr, "Checksum root corrupted, rerun with --init-csum-tree option\n");
10961 if (!ctx.progress_enabled)
10962 fprintf(stderr, "checking extents\n");
10963 ret = check_chunks_and_extents(root);
10965 fprintf(stderr, "Errors found in extent allocation tree or chunk allocation\n");
10967 ret = repair_root_items(info);
10971 fprintf(stderr, "Fixed %d roots.\n", ret);
10973 } else if (ret > 0) {
10975 "Found %d roots with an outdated root item.\n",
10978 "Please run a filesystem check with the option --repair to fix them.\n");
10983 if (!ctx.progress_enabled) {
10984 if (btrfs_fs_compat_ro(info, BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE))
10985 fprintf(stderr, "checking free space tree\n");
10987 fprintf(stderr, "checking free space cache\n");
10989 ret = check_space_cache(root);
10994 * We used to have to have these hole extents in between our real
10995 * extents so if we don't have this flag set we need to make sure there
10996 * are no gaps in the file extents for inodes, otherwise we can just
10997 * ignore it when this happens.
10999 no_holes = btrfs_fs_incompat(root->fs_info,
11000 BTRFS_FEATURE_INCOMPAT_NO_HOLES);
11001 if (!ctx.progress_enabled)
11002 fprintf(stderr, "checking fs roots\n");
11003 ret = check_fs_roots(root, &root_cache);
11007 fprintf(stderr, "checking csums\n");
11008 ret = check_csums(root);
11012 fprintf(stderr, "checking root refs\n");
11013 ret = check_root_refs(root, &root_cache);
11017 while (repair && !list_empty(&root->fs_info->recow_ebs)) {
11018 struct extent_buffer *eb;
11020 eb = list_first_entry(&root->fs_info->recow_ebs,
11021 struct extent_buffer, recow);
11022 list_del_init(&eb->recow);
11023 ret = recow_extent_buffer(root, eb);
11028 while (!list_empty(&delete_items)) {
11029 struct bad_item *bad;
11031 bad = list_first_entry(&delete_items, struct bad_item, list);
11032 list_del_init(&bad->list);
11034 ret = delete_bad_item(root, bad);
11038 if (info->quota_enabled) {
11040 fprintf(stderr, "checking quota groups\n");
11041 err = qgroup_verify_all(info);
11045 err = repair_qgroups(info, &qgroups_repaired);
11050 if (!list_empty(&root->fs_info->recow_ebs)) {
11051 fprintf(stderr, "Transid errors in file system\n");
11055 /* Don't override original ret */
11056 if (!ret && qgroups_repaired)
11057 ret = qgroups_repaired;
11059 if (found_old_backref) { /*
11060 * there was a disk format change when mixed
11061 * backref was in testing tree. The old format
11062 * existed about one week.
11064 printf("\n * Found old mixed backref format. "
11065 "The old format is not supported! *"
11066 "\n * Please mount the FS in readonly mode, "
11067 "backup data and re-format the FS. *\n\n");
11070 printf("found %llu bytes used err is %d\n",
11071 (unsigned long long)bytes_used, ret);
11072 printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
11073 printf("total tree bytes: %llu\n",
11074 (unsigned long long)total_btree_bytes);
11075 printf("total fs tree bytes: %llu\n",
11076 (unsigned long long)total_fs_tree_bytes);
11077 printf("total extent tree bytes: %llu\n",
11078 (unsigned long long)total_extent_tree_bytes);
11079 printf("btree space waste bytes: %llu\n",
11080 (unsigned long long)btree_space_waste);
11081 printf("file data blocks allocated: %llu\n referenced %llu\n",
11082 (unsigned long long)data_bytes_allocated,
11083 (unsigned long long)data_bytes_referenced);
11085 free_qgroup_counts();
11086 free_root_recs_tree(&root_cache);
11090 if (ctx.progress_enabled)
11091 task_deinit(ctx.info);